2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* PF_INET protocol family socket handler .
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Florian La Roche , < flla @ stud . uni - sb . de >
* Alan Cox , < A . Cox @ swansea . ac . uk >
*
* Changes ( see also sock . c )
*
* piggy ,
* Karl Knutson : Socket protocol table
* A . N . Kuznetsov : Socket death error in accept ( ) .
* John Richardson : Fix non blocking error in connect ( )
* so sockets that fail to connect
* don ' t return - EINPROGRESS .
* Alan Cox : Asynchronous I / O support
* Alan Cox : Keep correct socket pointer on sock
* structures
* when accept ( ) ed
* Alan Cox : Semantics of SO_LINGER aren ' t state
* moved to close when you look carefully .
* With this fixed and the accept bug fixed
* some RPC stuff seems happier .
* Niibe Yutaka : 4.4 BSD style write async I / O
* Alan Cox ,
* Tony Gale : Fixed reuse semantics .
* Alan Cox : bind ( ) shouldn ' t abort existing but dead
* sockets . Stops FTP netin : . . I hope .
* Alan Cox : bind ( ) works correctly for RAW sockets .
* Note that FreeBSD at least was broken
* in this respect so be careful with
* compatibility tests . . .
* Alan Cox : routing cache support
* Alan Cox : memzero the socket structure for
* compactness .
* Matt Day : nonblock connect error handler
* Alan Cox : Allow large numbers of pending sockets
* ( eg for big web sites ) , but only if
* specifically application requested .
* Alan Cox : New buffering throughout IP . Used
* dumbly .
* Alan Cox : New buffering now used smartly .
* Alan Cox : BSD rather than common sense
* interpretation of listen .
* Germano Caronni : Assorted small races .
* Alan Cox : sendmsg / recvmsg basic support .
* Alan Cox : Only sendmsg / recvmsg now supported .
* Alan Cox : Locked down bind ( see security list ) .
* Alan Cox : Loosened bind a little .
* Mike McLagan : ADD / DEL DLCI Ioctls
* Willy Konynenberg : Transparent proxying support .
* David S . Miller : New socket lookup architecture .
* Some other random speedups .
* Cyrus Durgin : Cleaned up file for kmod hacks .
* Andi Kleen : Fix inet_stream_connect TCP race .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
2006-06-22 14:02:40 +04:00
# include <linux/err.h>
2005-04-17 02:20:36 +04:00
# include <linux/errno.h>
# include <linux/types.h>
# include <linux/socket.h>
# include <linux/in.h>
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/sched.h>
# include <linux/timer.h>
# include <linux/string.h>
# include <linux/sockios.h>
# include <linux/net.h>
2006-01-11 23:17:47 +03:00
# include <linux/capability.h>
2005-04-17 02:20:36 +04:00
# include <linux/fcntl.h>
# include <linux/mm.h>
# include <linux/interrupt.h>
# include <linux/stat.h>
# include <linux/init.h>
# include <linux/poll.h>
# include <linux/netfilter_ipv4.h>
2007-03-23 21:40:27 +03:00
# include <linux/random.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/system.h>
# include <linux/inet.h>
# include <linux/igmp.h>
2005-12-27 07:43:12 +03:00
# include <linux/inetdevice.h>
2005-04-17 02:20:36 +04:00
# include <linux/netdevice.h>
2008-12-16 10:41:09 +03:00
# include <net/checksum.h>
2005-04-17 02:20:36 +04:00
# include <net/ip.h>
# include <net/protocol.h>
# include <net/arp.h>
# include <net/route.h>
# include <net/ip_fib.h>
2005-08-10 07:11:56 +04:00
# include <net/inet_connection_sock.h>
2005-04-17 02:20:36 +04:00
# include <net/tcp.h>
# include <net/udp.h>
2006-11-27 22:10:57 +03:00
# include <net/udplite.h>
2005-04-17 02:20:36 +04:00
# include <linux/skbuff.h>
# include <net/sock.h>
# include <net/raw.h>
# include <net/icmp.h>
# include <net/ipip.h>
# include <net/inet_common.h>
# include <net/xfrm.h>
2008-07-18 15:01:44 +04:00
# include <net/net_namespace.h>
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_IP_MROUTE
# include <linux/mroute.h>
# endif
/* The inetsw table contains everything that inet_create needs to
* build a new socket .
*/
static struct list_head inetsw [ SOCK_MAX ] ;
static DEFINE_SPINLOCK ( inetsw_lock ) ;
2007-12-05 12:38:23 +03:00
struct ipv4_config ipv4_config ;
EXPORT_SYMBOL ( ipv4_config ) ;
2005-04-17 02:20:36 +04:00
/* New destruction routine */
void inet_sock_destruct ( struct sock * sk )
{
struct inet_sock * inet = inet_sk ( sk ) ;
__skb_queue_purge ( & sk - > sk_receive_queue ) ;
__skb_queue_purge ( & sk - > sk_error_queue ) ;
2007-12-31 11:29:24 +03:00
sk_mem_reclaim ( sk ) ;
2005-04-17 02:20:36 +04:00
if ( sk - > sk_type = = SOCK_STREAM & & sk - > sk_state ! = TCP_CLOSE ) {
2009-08-29 10:45:21 +04:00
pr_err ( " Attempt to release TCP socket in state %d %p \n " ,
2005-04-17 02:20:36 +04:00
sk - > sk_state , sk ) ;
return ;
}
if ( ! sock_flag ( sk , SOCK_DEAD ) ) {
2009-08-29 10:45:21 +04:00
pr_err ( " Attempt to release alive inet socket %p \n " , sk ) ;
2005-04-17 02:20:36 +04:00
return ;
}
2008-07-26 08:43:18 +04:00
WARN_ON ( atomic_read ( & sk - > sk_rmem_alloc ) ) ;
WARN_ON ( atomic_read ( & sk - > sk_wmem_alloc ) ) ;
WARN_ON ( sk - > sk_wmem_queued ) ;
WARN_ON ( sk - > sk_forward_alloc ) ;
2005-04-17 02:20:36 +04:00
2005-11-08 20:41:34 +03:00
kfree ( inet - > opt ) ;
2005-04-17 02:20:36 +04:00
dst_release ( sk - > sk_dst_cache ) ;
2005-08-10 06:45:38 +04:00
sk_refcnt_debug_dec ( sk ) ;
2005-04-17 02:20:36 +04:00
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_sock_destruct ) ;
2005-04-17 02:20:36 +04:00
/*
* The routines beyond this point handle the behaviour of an AF_INET
* socket object . Mostly it punts to the subprotocols of IP to do
* the work .
*/
/*
* Automatically bind an unbound socket .
*/
static int inet_autobind ( struct sock * sk )
{
struct inet_sock * inet ;
/* We may need to bind the socket. */
lock_sock ( sk ) ;
inet = inet_sk ( sk ) ;
if ( ! inet - > num ) {
if ( sk - > sk_prot - > get_port ( sk , 0 ) ) {
release_sock ( sk ) ;
return - EAGAIN ;
}
inet - > sport = htons ( inet - > num ) ;
}
release_sock ( sk ) ;
return 0 ;
}
/*
* Move a socket into listening state .
*/
int inet_listen ( struct socket * sock , int backlog )
{
struct sock * sk = sock - > sk ;
unsigned char old_state ;
int err ;
lock_sock ( sk ) ;
err = - EINVAL ;
if ( sock - > state ! = SS_UNCONNECTED | | sock - > type ! = SOCK_STREAM )
goto out ;
old_state = sk - > sk_state ;
if ( ! ( ( 1 < < old_state ) & ( TCPF_CLOSE | TCPF_LISTEN ) ) )
goto out ;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted .
*/
if ( old_state ! = TCP_LISTEN ) {
2006-11-16 13:30:37 +03:00
err = inet_csk_listen_start ( sk , backlog ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
}
sk - > sk_max_ack_backlog = backlog ;
err = 0 ;
out :
release_sock ( sk ) ;
return err ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_listen ) ;
2005-04-17 02:20:36 +04:00
2007-03-28 00:53:04 +04:00
u32 inet_ehash_secret __read_mostly ;
2007-03-23 21:40:27 +03:00
EXPORT_SYMBOL ( inet_ehash_secret ) ;
2007-03-28 00:53:04 +04:00
/*
* inet_ehash_secret must be set exactly once
* Instead of using a dedicated spinlock , we ( ab ) use inetsw_lock
*/
2007-03-23 21:40:27 +03:00
void build_ehash_secret ( void )
{
2007-03-28 00:53:04 +04:00
u32 rnd ;
do {
get_random_bytes ( & rnd , sizeof ( rnd ) ) ;
} while ( rnd = = 0 ) ;
spin_lock_bh ( & inetsw_lock ) ;
if ( ! inet_ehash_secret )
inet_ehash_secret = rnd ;
spin_unlock_bh ( & inetsw_lock ) ;
2007-03-23 21:40:27 +03:00
}
EXPORT_SYMBOL ( build_ehash_secret ) ;
2008-03-25 01:33:42 +03:00
static inline int inet_netns_ok ( struct net * net , int protocol )
{
int hash ;
2009-09-14 16:21:47 +04:00
const struct net_protocol * ipprot ;
2008-03-25 01:33:42 +03:00
2008-11-24 02:42:23 +03:00
if ( net_eq ( net , & init_net ) )
2008-03-25 01:33:42 +03:00
return 1 ;
hash = protocol & ( MAX_INET_PROTOS - 1 ) ;
ipprot = rcu_dereference ( inet_protos [ hash ] ) ;
if ( ipprot = = NULL )
/* raw IP is OK */
return 1 ;
return ipprot - > netns_ok ;
}
2005-04-17 02:20:36 +04:00
/*
* Create an inet socket .
*/
2007-10-09 10:24:22 +04:00
static int inet_create ( struct net * net , struct socket * sock , int protocol )
2005-04-17 02:20:36 +04:00
{
struct sock * sk ;
struct inet_protosw * answer ;
struct inet_sock * inet ;
struct proto * answer_prot ;
unsigned char answer_flags ;
char answer_no_check ;
2005-08-10 07:19:14 +04:00
int try_loading_module = 0 ;
2005-12-03 07:43:26 +03:00
int err ;
2005-04-17 02:20:36 +04:00
2008-11-24 02:42:23 +03:00
if ( unlikely ( ! inet_ehash_secret ) )
if ( sock - > type ! = SOCK_RAW & & sock - > type ! = SOCK_DGRAM )
build_ehash_secret ( ) ;
2007-03-23 21:40:27 +03:00
2005-04-17 02:20:36 +04:00
sock - > state = SS_UNCONNECTED ;
/* Look for the requested type/protocol pair. */
2005-08-10 07:19:14 +04:00
lookup_protocol :
2005-12-03 07:43:26 +03:00
err = - ESOCKTNOSUPPORT ;
2005-04-17 02:20:36 +04:00
rcu_read_lock ( ) ;
2008-07-25 12:45:34 +04:00
list_for_each_entry_rcu ( answer , & inetsw [ sock - > type ] , list ) {
2005-04-17 02:20:36 +04:00
2008-07-25 12:45:34 +04:00
err = 0 ;
2005-04-17 02:20:36 +04:00
/* Check the non-wild match. */
if ( protocol = = answer - > protocol ) {
if ( protocol ! = IPPROTO_IP )
break ;
} else {
/* Check for the two wild cases. */
if ( IPPROTO_IP = = protocol ) {
protocol = answer - > protocol ;
break ;
}
if ( IPPROTO_IP = = answer - > protocol )
break ;
}
2005-12-03 07:43:26 +03:00
err = - EPROTONOSUPPORT ;
2005-04-17 02:20:36 +04:00
}
2008-07-25 12:45:34 +04:00
if ( unlikely ( err ) ) {
2005-08-10 07:19:14 +04:00
if ( try_loading_module < 2 ) {
rcu_read_unlock ( ) ;
/*
* Be more specific , e . g . net - pf - 2 - proto - 132 - type - 1
* ( net - pf - PF_INET - proto - IPPROTO_SCTP - type - SOCK_STREAM )
*/
if ( + + try_loading_module = = 1 )
request_module ( " net-pf-%d-proto-%d-type-%d " ,
PF_INET , protocol , sock - > type ) ;
/*
* Fall back to generic , e . g . net - pf - 2 - proto - 132
* ( net - pf - PF_INET - proto - IPPROTO_SCTP )
*/
else
request_module ( " net-pf-%d-proto-%d " ,
PF_INET , protocol ) ;
goto lookup_protocol ;
} else
goto out_rcu_unlock ;
}
2005-04-17 02:20:36 +04:00
err = - EPERM ;
if ( answer - > capability > 0 & & ! capable ( answer - > capability ) )
goto out_rcu_unlock ;
2008-03-25 01:33:42 +03:00
err = - EAFNOSUPPORT ;
if ( ! inet_netns_ok ( net , protocol ) )
goto out_rcu_unlock ;
2005-04-17 02:20:36 +04:00
sock - > ops = answer - > ops ;
answer_prot = answer - > prot ;
answer_no_check = answer - > no_check ;
answer_flags = answer - > flags ;
rcu_read_unlock ( ) ;
2008-07-26 08:43:18 +04:00
WARN_ON ( answer_prot - > slab = = NULL ) ;
2005-04-17 02:20:36 +04:00
err = - ENOBUFS ;
2007-11-01 10:39:31 +03:00
sk = sk_alloc ( net , PF_INET , GFP_KERNEL , answer_prot ) ;
2005-04-17 02:20:36 +04:00
if ( sk = = NULL )
goto out ;
err = 0 ;
sk - > sk_no_check = answer_no_check ;
if ( INET_PROTOSW_REUSE & answer_flags )
sk - > sk_reuse = 1 ;
inet = inet_sk ( sk ) ;
2007-01-10 01:37:06 +03:00
inet - > is_icsk = ( INET_PROTOSW_ICSK & answer_flags ) ! = 0 ;
2005-04-17 02:20:36 +04:00
if ( SOCK_RAW = = sock - > type ) {
inet - > num = protocol ;
if ( IPPROTO_RAW = = protocol )
inet - > hdrincl = 1 ;
}
if ( ipv4_config . no_pmtu_disc )
inet - > pmtudisc = IP_PMTUDISC_DONT ;
else
inet - > pmtudisc = IP_PMTUDISC_WANT ;
inet - > id = 0 ;
sock_init_data ( sock , sk ) ;
sk - > sk_destruct = inet_sock_destruct ;
sk - > sk_protocol = protocol ;
sk - > sk_backlog_rcv = sk - > sk_prot - > backlog_rcv ;
inet - > uc_ttl = - 1 ;
inet - > mc_loop = 1 ;
inet - > mc_ttl = 1 ;
2009-05-28 11:00:46 +04:00
inet - > mc_all = 1 ;
2005-04-17 02:20:36 +04:00
inet - > mc_index = 0 ;
inet - > mc_list = NULL ;
2005-08-10 06:45:38 +04:00
sk_refcnt_debug_inc ( sk ) ;
2005-04-17 02:20:36 +04:00
if ( inet - > num ) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares .
*/
inet - > sport = htons ( inet - > num ) ;
/* Add to protocol hash chains. */
sk - > sk_prot - > hash ( sk ) ;
}
if ( sk - > sk_prot - > init ) {
err = sk - > sk_prot - > init ( sk ) ;
if ( err )
sk_common_release ( sk ) ;
}
out :
return err ;
out_rcu_unlock :
rcu_read_unlock ( ) ;
goto out ;
}
/*
* The peer socket should always be NULL ( or else ) . When we call this
* function we are destroying the object and from then on nobody
* should refer to it .
*/
int inet_release ( struct socket * sock )
{
struct sock * sk = sock - > sk ;
if ( sk ) {
long timeout ;
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket ( sk ) ;
/* If linger is set, we don't return until the close
* is complete . Otherwise we return immediately . The
* actually closing is done the same either way .
*
* If the close is due to the process exiting , we never
* linger . .
*/
timeout = 0 ;
if ( sock_flag ( sk , SOCK_LINGER ) & &
! ( current - > flags & PF_EXITING ) )
timeout = sk - > sk_lingertime ;
sock - > sk = NULL ;
sk - > sk_prot - > close ( sk , timeout ) ;
}
return 0 ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_release ) ;
2005-04-17 02:20:36 +04:00
/* It is off by default, see below. */
2006-09-23 01:15:41 +04:00
int sysctl_ip_nonlocal_bind __read_mostly ;
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( sysctl_ip_nonlocal_bind ) ;
2005-04-17 02:20:36 +04:00
int inet_bind ( struct socket * sock , struct sockaddr * uaddr , int addr_len )
{
struct sockaddr_in * addr = ( struct sockaddr_in * ) uaddr ;
struct sock * sk = sock - > sk ;
struct inet_sock * inet = inet_sk ( sk ) ;
unsigned short snum ;
int chk_addr_ret ;
int err ;
/* If the socket has its own bind function then use it. (RAW) */
if ( sk - > sk_prot - > bind ) {
err = sk - > sk_prot - > bind ( sk , uaddr , addr_len ) ;
goto out ;
}
err = - EINVAL ;
if ( addr_len < sizeof ( struct sockaddr_in ) )
goto out ;
2008-03-25 20:26:21 +03:00
chk_addr_ret = inet_addr_type ( sock_net ( sk ) , addr - > sin_addr . s_addr ) ;
2005-04-17 02:20:36 +04:00
/* Not specified by any standard per-se, however it breaks too
* many applications when removed . It is unfortunate since
* allowing applications to make a non - local bind solves
* several problems with systems using dynamic addressing .
* ( ie . your servers still start up even if your ISDN link
* is temporarily down )
*/
err = - EADDRNOTAVAIL ;
if ( ! sysctl_ip_nonlocal_bind & &
2008-10-01 18:31:24 +04:00
! ( inet - > freebind | | inet - > transparent ) & &
2008-03-18 08:44:53 +03:00
addr - > sin_addr . s_addr ! = htonl ( INADDR_ANY ) & &
2005-04-17 02:20:36 +04:00
chk_addr_ret ! = RTN_LOCAL & &
chk_addr_ret ! = RTN_MULTICAST & &
chk_addr_ret ! = RTN_BROADCAST )
goto out ;
snum = ntohs ( addr - > sin_port ) ;
err = - EACCES ;
if ( snum & & snum < PROT_SOCK & & ! capable ( CAP_NET_BIND_SERVICE ) )
goto out ;
/* We keep a pair of addresses. rcv_saddr is the one
* used by hash lookups , and saddr is used for transmit .
*
* In the BSD API these are the same except where it
* would be illegal to use them ( multicast / broadcast ) in
* which case the sending device address is used .
*/
lock_sock ( sk ) ;
/* Check these errors (active socket, double bind). */
err = - EINVAL ;
if ( sk - > sk_state ! = TCP_CLOSE | | inet - > num )
goto out_release_sock ;
inet - > rcv_saddr = inet - > saddr = addr - > sin_addr . s_addr ;
if ( chk_addr_ret = = RTN_MULTICAST | | chk_addr_ret = = RTN_BROADCAST )
inet - > saddr = 0 ; /* Use device */
/* Make sure we are allowed to bind here. */
if ( sk - > sk_prot - > get_port ( sk , snum ) ) {
inet - > saddr = inet - > rcv_saddr = 0 ;
err = - EADDRINUSE ;
goto out_release_sock ;
}
if ( inet - > rcv_saddr )
sk - > sk_userlocks | = SOCK_BINDADDR_LOCK ;
if ( snum )
sk - > sk_userlocks | = SOCK_BINDPORT_LOCK ;
inet - > sport = htons ( inet - > num ) ;
inet - > daddr = 0 ;
inet - > dport = 0 ;
sk_dst_reset ( sk ) ;
err = 0 ;
out_release_sock :
release_sock ( sk ) ;
out :
return err ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_bind ) ;
2005-04-17 02:20:36 +04:00
int inet_dgram_connect ( struct socket * sock , struct sockaddr * uaddr ,
int addr_len , int flags )
{
struct sock * sk = sock - > sk ;
if ( uaddr - > sa_family = = AF_UNSPEC )
return sk - > sk_prot - > disconnect ( sk , flags ) ;
if ( ! inet_sk ( sk ) - > num & & inet_autobind ( sk ) )
return - EAGAIN ;
return sk - > sk_prot - > connect ( sk , ( struct sockaddr * ) uaddr , addr_len ) ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_dgram_connect ) ;
2005-04-17 02:20:36 +04:00
static long inet_wait_for_connect ( struct sock * sk , long timeo )
{
DEFINE_WAIT ( wait ) ;
prepare_to_wait ( sk - > sk_sleep , & wait , TASK_INTERRUPTIBLE ) ;
/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_ * .
* Connect ( ) does not allow to get error notifications
* without closing the socket .
*/
while ( ( 1 < < sk - > sk_state ) & ( TCPF_SYN_SENT | TCPF_SYN_RECV ) ) {
release_sock ( sk ) ;
timeo = schedule_timeout ( timeo ) ;
lock_sock ( sk ) ;
if ( signal_pending ( current ) | | ! timeo )
break ;
prepare_to_wait ( sk - > sk_sleep , & wait , TASK_INTERRUPTIBLE ) ;
}
finish_wait ( sk - > sk_sleep , & wait ) ;
return timeo ;
}
/*
* Connect to a remote host . There is regrettably still a little
* TCP ' magic ' in here .
*/
int inet_stream_connect ( struct socket * sock , struct sockaddr * uaddr ,
int addr_len , int flags )
{
struct sock * sk = sock - > sk ;
int err ;
long timeo ;
lock_sock ( sk ) ;
if ( uaddr - > sa_family = = AF_UNSPEC ) {
err = sk - > sk_prot - > disconnect ( sk , flags ) ;
sock - > state = err ? SS_DISCONNECTING : SS_UNCONNECTED ;
goto out ;
}
switch ( sock - > state ) {
default :
err = - EINVAL ;
goto out ;
case SS_CONNECTED :
err = - EISCONN ;
goto out ;
case SS_CONNECTING :
err = - EALREADY ;
/* Fall out of switch with err, set for this state */
break ;
case SS_UNCONNECTED :
err = - EISCONN ;
if ( sk - > sk_state ! = TCP_CLOSE )
goto out ;
err = sk - > sk_prot - > connect ( sk , uaddr , addr_len ) ;
if ( err < 0 )
goto out ;
2007-02-09 17:24:47 +03:00
sock - > state = SS_CONNECTING ;
2005-04-17 02:20:36 +04:00
/* Just entered SS_CONNECTING state; the only
* difference is that return value in non - blocking
* case is EINPROGRESS , rather than EALREADY .
*/
err = - EINPROGRESS ;
break ;
}
timeo = sock_sndtimeo ( sk , flags & O_NONBLOCK ) ;
if ( ( 1 < < sk - > sk_state ) & ( TCPF_SYN_SENT | TCPF_SYN_RECV ) ) {
/* Error code is set above */
if ( ! timeo | | ! inet_wait_for_connect ( sk , timeo ) )
goto out ;
err = sock_intr_errno ( timeo ) ;
if ( signal_pending ( current ) )
goto out ;
}
/* Connection was closed by RST, timeout, ICMP error
* or another process disconnected us .
*/
if ( sk - > sk_state = = TCP_CLOSE )
goto sock_error ;
/* sk->sk_err may be not zero now, if RECVERR was ordered by user
* and error was received after socket entered established state .
* Hence , it is handled normally after connect ( ) return successfully .
*/
sock - > state = SS_CONNECTED ;
err = 0 ;
out :
release_sock ( sk ) ;
return err ;
sock_error :
err = sock_error ( sk ) ? : - ECONNABORTED ;
sock - > state = SS_UNCONNECTED ;
if ( sk - > sk_prot - > disconnect ( sk , flags ) )
sock - > state = SS_DISCONNECTING ;
goto out ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_stream_connect ) ;
2005-04-17 02:20:36 +04:00
/*
* Accept a pending connection . The TCP layer now gives BSD semantics .
*/
int inet_accept ( struct socket * sock , struct socket * newsock , int flags )
{
struct sock * sk1 = sock - > sk ;
int err = - EINVAL ;
struct sock * sk2 = sk1 - > sk_prot - > accept ( sk1 , flags , & err ) ;
if ( ! sk2 )
goto do_err ;
lock_sock ( sk2 ) ;
2008-07-26 08:43:18 +04:00
WARN_ON ( ! ( ( 1 < < sk2 - > sk_state ) &
( TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE ) ) ) ;
2005-04-17 02:20:36 +04:00
sock_graft ( sk2 , newsock ) ;
newsock - > state = SS_CONNECTED ;
err = 0 ;
release_sock ( sk2 ) ;
do_err :
return err ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_accept ) ;
2005-04-17 02:20:36 +04:00
/*
* This does both peername and sockname .
*/
int inet_getname ( struct socket * sock , struct sockaddr * uaddr ,
int * uaddr_len , int peer )
{
struct sock * sk = sock - > sk ;
struct inet_sock * inet = inet_sk ( sk ) ;
struct sockaddr_in * sin = ( struct sockaddr_in * ) uaddr ;
sin - > sin_family = AF_INET ;
if ( peer ) {
if ( ! inet - > dport | |
( ( ( 1 < < sk - > sk_state ) & ( TCPF_CLOSE | TCPF_SYN_SENT ) ) & &
peer = = 1 ) )
return - ENOTCONN ;
sin - > sin_port = inet - > dport ;
sin - > sin_addr . s_addr = inet - > daddr ;
} else {
2006-11-15 07:51:49 +03:00
__be32 addr = inet - > rcv_saddr ;
2005-04-17 02:20:36 +04:00
if ( ! addr )
addr = inet - > saddr ;
sin - > sin_port = inet - > sport ;
sin - > sin_addr . s_addr = addr ;
}
memset ( sin - > sin_zero , 0 , sizeof ( sin - > sin_zero ) ) ;
* uaddr_len = sizeof ( * sin ) ;
return 0 ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_getname ) ;
2005-04-17 02:20:36 +04:00
int inet_sendmsg ( struct kiocb * iocb , struct socket * sock , struct msghdr * msg ,
size_t size )
{
struct sock * sk = sock - > sk ;
/* We may need to bind the socket. */
if ( ! inet_sk ( sk ) - > num & & inet_autobind ( sk ) )
return - EAGAIN ;
return sk - > sk_prot - > sendmsg ( iocb , sk , msg , size ) ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_sendmsg ) ;
2005-04-17 02:20:36 +04:00
2009-08-29 10:45:21 +04:00
static ssize_t inet_sendpage ( struct socket * sock , struct page * page , int offset ,
size_t size , int flags )
2005-04-17 02:20:36 +04:00
{
struct sock * sk = sock - > sk ;
/* We may need to bind the socket. */
if ( ! inet_sk ( sk ) - > num & & inet_autobind ( sk ) )
return - EAGAIN ;
if ( sk - > sk_prot - > sendpage )
return sk - > sk_prot - > sendpage ( sk , page , offset , size , flags ) ;
return sock_no_sendpage ( sock , page , offset , size , flags ) ;
}
int inet_shutdown ( struct socket * sock , int how )
{
struct sock * sk = sock - > sk ;
int err = 0 ;
/* This should really check to make sure
* the socket is a TCP socket . ( WHY AC . . . )
*/
how + + ; /* maps 0->1 has the advantage of making bit 1 rcvs and
1 - > 2 bit 2 snds .
2 - > 3 */
if ( ( how & ~ SHUTDOWN_MASK ) | | ! how ) /* MAXINT->0 */
return - EINVAL ;
lock_sock ( sk ) ;
if ( sock - > state = = SS_CONNECTING ) {
if ( ( 1 < < sk - > sk_state ) &
( TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE ) )
sock - > state = SS_DISCONNECTING ;
else
sock - > state = SS_CONNECTED ;
}
switch ( sk - > sk_state ) {
case TCP_CLOSE :
err = - ENOTCONN ;
/* Hack to wake up other listeners, who can poll for
POLLHUP , even on eg . unconnected UDP sockets - - RR */
default :
sk - > sk_shutdown | = how ;
if ( sk - > sk_prot - > shutdown )
sk - > sk_prot - > shutdown ( sk , how ) ;
break ;
/* Remaining two branches are temporary solution for missing
* close ( ) in multithreaded environment . It is _not_ a good idea ,
* but we have no choice until close ( ) is repaired at VFS level .
*/
case TCP_LISTEN :
if ( ! ( how & RCV_SHUTDOWN ) )
break ;
/* Fall through */
case TCP_SYN_SENT :
err = sk - > sk_prot - > disconnect ( sk , O_NONBLOCK ) ;
sock - > state = err ? SS_DISCONNECTING : SS_UNCONNECTED ;
break ;
}
/* Wake up anyone sleeping in poll. */
sk - > sk_state_change ( sk ) ;
release_sock ( sk ) ;
return err ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_shutdown ) ;
2005-04-17 02:20:36 +04:00
/*
* ioctl ( ) calls you can issue on an INET socket . Most of these are
* device configuration and stuff and very rarely used . Some ioctls
* pass on to the socket itself .
*
* NOTE : I like the idea of a module for the config stuff . ie ifconfig
* loads the devconfigure module does its configuring and unloads it .
* There ' s a good 20 K of config code hanging around the kernel .
*/
int inet_ioctl ( struct socket * sock , unsigned int cmd , unsigned long arg )
{
struct sock * sk = sock - > sk ;
int err = 0 ;
2008-03-25 20:26:21 +03:00
struct net * net = sock_net ( sk ) ;
2005-04-17 02:20:36 +04:00
switch ( cmd ) {
2009-08-29 10:45:21 +04:00
case SIOCGSTAMP :
err = sock_get_timestamp ( sk , ( struct timeval __user * ) arg ) ;
break ;
case SIOCGSTAMPNS :
err = sock_get_timestampns ( sk , ( struct timespec __user * ) arg ) ;
break ;
case SIOCADDRT :
case SIOCDELRT :
case SIOCRTMSG :
err = ip_rt_ioctl ( net , cmd , ( void __user * ) arg ) ;
break ;
case SIOCDARP :
case SIOCGARP :
case SIOCSARP :
err = arp_ioctl ( net , cmd , ( void __user * ) arg ) ;
break ;
case SIOCGIFADDR :
case SIOCSIFADDR :
case SIOCGIFBRDADDR :
case SIOCSIFBRDADDR :
case SIOCGIFNETMASK :
case SIOCSIFNETMASK :
case SIOCGIFDSTADDR :
case SIOCSIFDSTADDR :
case SIOCSIFPFLAGS :
case SIOCGIFPFLAGS :
case SIOCSIFFLAGS :
err = devinet_ioctl ( net , cmd , ( void __user * ) arg ) ;
break ;
default :
if ( sk - > sk_prot - > ioctl )
err = sk - > sk_prot - > ioctl ( sk , cmd , arg ) ;
else
err = - ENOIOCTLCMD ;
break ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_ioctl ) ;
2005-04-17 02:20:36 +04:00
2005-12-22 23:49:22 +03:00
const struct proto_ops inet_stream_ops = {
2006-03-21 09:48:35 +03:00
. family = PF_INET ,
. owner = THIS_MODULE ,
. release = inet_release ,
. bind = inet_bind ,
. connect = inet_stream_connect ,
. socketpair = sock_no_socketpair ,
. accept = inet_accept ,
. getname = inet_getname ,
. poll = tcp_poll ,
. ioctl = inet_ioctl ,
. listen = inet_listen ,
. shutdown = inet_shutdown ,
. setsockopt = sock_common_setsockopt ,
. getsockopt = sock_common_getsockopt ,
2007-08-03 06:23:56 +04:00
. sendmsg = tcp_sendmsg ,
2006-03-21 09:48:35 +03:00
. recvmsg = sock_common_recvmsg ,
. mmap = sock_no_mmap ,
. sendpage = tcp_sendpage ,
2007-11-07 10:30:13 +03:00
. splice_read = tcp_splice_read ,
2006-03-21 09:45:21 +03:00
# ifdef CONFIG_COMPAT
2006-03-21 09:48:35 +03:00
. compat_setsockopt = compat_sock_common_setsockopt ,
. compat_getsockopt = compat_sock_common_getsockopt ,
2006-03-21 09:45:21 +03:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_stream_ops ) ;
2005-04-17 02:20:36 +04:00
2005-12-22 23:49:22 +03:00
const struct proto_ops inet_dgram_ops = {
2006-03-21 09:48:35 +03:00
. family = PF_INET ,
. owner = THIS_MODULE ,
. release = inet_release ,
. bind = inet_bind ,
. connect = inet_dgram_connect ,
. socketpair = sock_no_socketpair ,
. accept = sock_no_accept ,
. getname = inet_getname ,
. poll = udp_poll ,
. ioctl = inet_ioctl ,
. listen = sock_no_listen ,
. shutdown = inet_shutdown ,
. setsockopt = sock_common_setsockopt ,
. getsockopt = sock_common_getsockopt ,
. sendmsg = inet_sendmsg ,
. recvmsg = sock_common_recvmsg ,
. mmap = sock_no_mmap ,
. sendpage = inet_sendpage ,
2006-03-21 09:45:21 +03:00
# ifdef CONFIG_COMPAT
2006-03-21 09:48:35 +03:00
. compat_setsockopt = compat_sock_common_setsockopt ,
. compat_getsockopt = compat_sock_common_getsockopt ,
2006-03-21 09:45:21 +03:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_dgram_ops ) ;
2005-04-17 02:20:36 +04:00
/*
* For SOCK_RAW sockets ; should be the same as inet_dgram_ops but without
* udp_poll
*/
2005-12-22 23:49:22 +03:00
static const struct proto_ops inet_sockraw_ops = {
2006-03-21 09:48:35 +03:00
. family = PF_INET ,
. owner = THIS_MODULE ,
. release = inet_release ,
. bind = inet_bind ,
. connect = inet_dgram_connect ,
. socketpair = sock_no_socketpair ,
. accept = sock_no_accept ,
. getname = inet_getname ,
. poll = datagram_poll ,
. ioctl = inet_ioctl ,
. listen = sock_no_listen ,
. shutdown = inet_shutdown ,
. setsockopt = sock_common_setsockopt ,
. getsockopt = sock_common_getsockopt ,
. sendmsg = inet_sendmsg ,
. recvmsg = sock_common_recvmsg ,
. mmap = sock_no_mmap ,
. sendpage = inet_sendpage ,
2006-03-21 09:45:21 +03:00
# ifdef CONFIG_COMPAT
2006-03-21 09:48:35 +03:00
. compat_setsockopt = compat_sock_common_setsockopt ,
. compat_getsockopt = compat_sock_common_getsockopt ,
2006-03-21 09:45:21 +03:00
# endif
2005-04-17 02:20:36 +04:00
} ;
2009-10-05 09:58:39 +04:00
static const struct net_proto_family inet_family_ops = {
2005-04-17 02:20:36 +04:00
. family = PF_INET ,
. create = inet_create ,
. owner = THIS_MODULE ,
} ;
/* Upon startup we insert all the elements in inetsw_array[] into
* the linked list inetsw .
*/
static struct inet_protosw inetsw_array [ ] =
{
2007-02-09 17:24:47 +03:00
{
. type = SOCK_STREAM ,
. protocol = IPPROTO_TCP ,
. prot = & tcp_prot ,
. ops = & inet_stream_ops ,
. capability = - 1 ,
. no_check = 0 ,
. flags = INET_PROTOSW_PERMANENT |
2005-12-14 10:26:10 +03:00
INET_PROTOSW_ICSK ,
2007-02-09 17:24:47 +03:00
} ,
{
. type = SOCK_DGRAM ,
. protocol = IPPROTO_UDP ,
. prot = & udp_prot ,
. ops = & inet_dgram_ops ,
. capability = - 1 ,
. no_check = UDP_CSUM_DEFAULT ,
. flags = INET_PROTOSW_PERMANENT ,
2005-04-17 02:20:36 +04:00
} ,
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
{
2007-02-09 17:24:47 +03:00
. type = SOCK_RAW ,
. protocol = IPPROTO_IP , /* wild card */
. prot = & raw_prot ,
. ops = & inet_sockraw_ops ,
. capability = CAP_NET_RAW ,
. no_check = UDP_CSUM_DEFAULT ,
. flags = INET_PROTOSW_REUSE ,
2005-04-17 02:20:36 +04:00
}
} ;
2007-09-17 03:39:25 +04:00
# define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)
2005-04-17 02:20:36 +04:00
void inet_register_protosw ( struct inet_protosw * p )
{
struct list_head * lh ;
struct inet_protosw * answer ;
int protocol = p - > protocol ;
struct list_head * last_perm ;
spin_lock_bh ( & inetsw_lock ) ;
if ( p - > type > = SOCK_MAX )
goto out_illegal ;
/* If we are trying to override a permanent protocol, bail. */
answer = NULL ;
last_perm = & inetsw [ p - > type ] ;
list_for_each ( lh , & inetsw [ p - > type ] ) {
answer = list_entry ( lh , struct inet_protosw , list ) ;
/* Check only the non-wild match. */
if ( INET_PROTOSW_PERMANENT & answer - > flags ) {
if ( protocol = = answer - > protocol )
break ;
last_perm = lh ;
}
answer = NULL ;
}
if ( answer )
goto out_permanent ;
/* Add the new entry after the last permanent entry if any, so that
* the new entry does not override a permanent entry when matched with
* a wild - card protocol . But it is allowed to override any existing
2007-02-09 17:24:47 +03:00
* non - permanent entry . This means that when we remove this entry , the
2005-04-17 02:20:36 +04:00
* system automatically returns to the old behavior .
*/
list_add_rcu ( & p - > list , last_perm ) ;
out :
spin_unlock_bh ( & inetsw_lock ) ;
return ;
out_permanent :
printk ( KERN_ERR " Attempt to override permanent protocol %d. \n " ,
protocol ) ;
goto out ;
out_illegal :
printk ( KERN_ERR
" Ignoring attempt to register invalid socket type %d. \n " ,
p - > type ) ;
goto out ;
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_register_protosw ) ;
2005-04-17 02:20:36 +04:00
void inet_unregister_protosw ( struct inet_protosw * p )
{
if ( INET_PROTOSW_PERMANENT & p - > flags ) {
printk ( KERN_ERR
" Attempt to unregister permanent protocol %d. \n " ,
p - > protocol ) ;
} else {
spin_lock_bh ( & inetsw_lock ) ;
list_del_rcu ( & p - > list ) ;
spin_unlock_bh ( & inetsw_lock ) ;
synchronize_net ( ) ;
}
}
2009-08-29 10:45:21 +04:00
EXPORT_SYMBOL ( inet_unregister_protosw ) ;
2005-04-17 02:20:36 +04:00
2005-08-10 06:50:02 +04:00
/*
* Shall we try to damage output packets if routing dev changes ?
*/
2006-09-23 01:15:41 +04:00
int sysctl_ip_dynaddr __read_mostly ;
2005-08-10 06:50:02 +04:00
static int inet_sk_reselect_saddr ( struct sock * sk )
{
struct inet_sock * inet = inet_sk ( sk ) ;
int err ;
struct rtable * rt ;
2006-11-15 07:51:49 +03:00
__be32 old_saddr = inet - > saddr ;
__be32 new_saddr ;
2006-09-27 08:27:15 +04:00
__be32 daddr = inet - > daddr ;
2005-08-10 06:50:02 +04:00
if ( inet - > opt & & inet - > opt - > srr )
daddr = inet - > opt - > faddr ;
/* Query new route. */
err = ip_route_connect ( & rt , daddr , 0 ,
RT_CONN_FLAGS ( sk ) ,
sk - > sk_bound_dev_if ,
sk - > sk_protocol ,
2007-02-08 13:09:21 +03:00
inet - > sport , inet - > dport , sk , 0 ) ;
2005-08-10 06:50:02 +04:00
if ( err )
return err ;
sk_setup_caps ( sk , & rt - > u . dst ) ;
new_saddr = rt - > rt_src ;
if ( new_saddr = = old_saddr )
return 0 ;
if ( sysctl_ip_dynaddr > 1 ) {
2008-10-31 10:53:57 +03:00
printk ( KERN_INFO " %s(): shifting inet->saddr from %pI4 to %pI4 \n " ,
__func__ , & old_saddr , & new_saddr ) ;
2005-08-10 06:50:02 +04:00
}
inet - > saddr = inet - > rcv_saddr = new_saddr ;
/*
* XXX The only one ugly spot where we need to
* XXX really change the sockets identity after
* XXX it has entered the hashes . - DaveM
*
* Besides that , it does not check for connection
* uniqueness . Wait for troubles .
*/
__sk_prot_rehash ( sk ) ;
return 0 ;
}
int inet_sk_rebuild_header ( struct sock * sk )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct rtable * rt = ( struct rtable * ) __sk_dst_check ( sk , 0 ) ;
2006-09-28 05:28:07 +04:00
__be32 daddr ;
2005-08-10 06:50:02 +04:00
int err ;
/* Route is OK, nothing to do. */
if ( rt )
return 0 ;
/* Reroute. */
daddr = inet - > daddr ;
if ( inet - > opt & & inet - > opt - > srr )
daddr = inet - > opt - > faddr ;
{
struct flowi fl = {
. oif = sk - > sk_bound_dev_if ,
2009-10-02 02:16:49 +04:00
. mark = sk - > sk_mark ,
2005-08-10 06:50:02 +04:00
. nl_u = {
. ip4_u = {
. daddr = daddr ,
. saddr = inet - > saddr ,
. tos = RT_CONN_FLAGS ( sk ) ,
} ,
} ,
. proto = sk - > sk_protocol ,
2008-11-20 12:08:06 +03:00
. flags = inet_sk_flowi_flags ( sk ) ,
2005-08-10 06:50:02 +04:00
. uli_u = {
. ports = {
. sport = inet - > sport ,
. dport = inet - > dport ,
} ,
} ,
} ;
2007-02-09 17:24:47 +03:00
2006-08-05 10:12:42 +04:00
security_sk_classify_flow ( sk , & fl ) ;
2008-03-25 20:26:21 +03:00
err = ip_route_output_flow ( sock_net ( sk ) , & rt , & fl , sk , 0 ) ;
2005-08-10 06:50:02 +04:00
}
if ( ! err )
sk_setup_caps ( sk , & rt - > u . dst ) ;
else {
/* Routing failed... */
sk - > sk_route_caps = 0 ;
/*
* Other protocols have to map its equivalent state to TCP_SYN_SENT .
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT . - acme
*/
if ( ! sysctl_ip_dynaddr | |
sk - > sk_state ! = TCP_SYN_SENT | |
( sk - > sk_userlocks & SOCK_BINDADDR_LOCK ) | |
( err = inet_sk_reselect_saddr ( sk ) ) ! = 0 )
sk - > sk_err_soft = - err ;
}
return err ;
}
EXPORT_SYMBOL ( inet_sk_rebuild_header ) ;
2006-07-09 00:34:56 +04:00
static int inet_gso_send_check ( struct sk_buff * skb )
{
struct iphdr * iph ;
2009-09-14 16:21:47 +04:00
const struct net_protocol * ops ;
2006-07-09 00:34:56 +04:00
int proto ;
int ihl ;
int err = - EINVAL ;
if ( unlikely ( ! pskb_may_pull ( skb , sizeof ( * iph ) ) ) )
goto out ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2006-07-09 00:34:56 +04:00
ihl = iph - > ihl * 4 ;
if ( ihl < sizeof ( * iph ) )
goto out ;
if ( unlikely ( ! pskb_may_pull ( skb , ihl ) ) )
goto out ;
2007-03-13 19:06:52 +03:00
__skb_pull ( skb , ihl ) ;
skb_reset_transport_header ( skb ) ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2006-07-09 00:34:56 +04:00
proto = iph - > protocol & ( MAX_INET_PROTOS - 1 ) ;
err = - EPROTONOSUPPORT ;
rcu_read_lock ( ) ;
ops = rcu_dereference ( inet_protos [ proto ] ) ;
if ( likely ( ops & & ops - > gso_send_check ) )
err = ops - > gso_send_check ( skb ) ;
rcu_read_unlock ( ) ;
out :
return err ;
}
2006-06-28 00:22:38 +04:00
static struct sk_buff * inet_gso_segment ( struct sk_buff * skb , int features )
2006-06-22 14:02:40 +04:00
{
struct sk_buff * segs = ERR_PTR ( - EINVAL ) ;
struct iphdr * iph ;
2009-09-14 16:21:47 +04:00
const struct net_protocol * ops ;
2006-06-22 14:02:40 +04:00
int proto ;
int ihl ;
int id ;
2009-07-09 12:09:47 +04:00
unsigned int offset = 0 ;
2006-06-22 14:02:40 +04:00
2007-06-27 11:47:37 +04:00
if ( ! ( features & NETIF_F_V4_CSUM ) )
features & = ~ NETIF_F_SG ;
2006-07-04 06:38:35 +04:00
if ( unlikely ( skb_shinfo ( skb ) - > gso_type &
~ ( SKB_GSO_TCPV4 |
SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
0 ) ) )
goto out ;
if ( unlikely ( ! pskb_may_pull ( skb , sizeof ( * iph ) ) ) )
2006-06-22 14:02:40 +04:00
goto out ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2006-06-22 14:02:40 +04:00
ihl = iph - > ihl * 4 ;
if ( ihl < sizeof ( * iph ) )
goto out ;
2006-07-04 06:38:35 +04:00
if ( unlikely ( ! pskb_may_pull ( skb , ihl ) ) )
2006-06-22 14:02:40 +04:00
goto out ;
2007-03-13 19:06:52 +03:00
__skb_pull ( skb , ihl ) ;
skb_reset_transport_header ( skb ) ;
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2006-06-22 14:02:40 +04:00
id = ntohs ( iph - > id ) ;
proto = iph - > protocol & ( MAX_INET_PROTOS - 1 ) ;
segs = ERR_PTR ( - EPROTONOSUPPORT ) ;
rcu_read_lock ( ) ;
ops = rcu_dereference ( inet_protos [ proto ] ) ;
2006-07-04 06:38:35 +04:00
if ( likely ( ops & & ops - > gso_segment ) )
2006-06-28 00:22:38 +04:00
segs = ops - > gso_segment ( skb , features ) ;
2006-06-22 14:02:40 +04:00
rcu_read_unlock ( ) ;
2008-04-29 12:03:09 +04:00
if ( ! segs | | IS_ERR ( segs ) )
2006-06-22 14:02:40 +04:00
goto out ;
skb = segs ;
do {
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb ) ;
2009-07-09 12:09:47 +04:00
if ( proto = = IPPROTO_UDP ) {
iph - > id = htons ( id ) ;
iph - > frag_off = htons ( offset > > 3 ) ;
if ( skb - > next ! = NULL )
iph - > frag_off | = htons ( IP_MF ) ;
offset + = ( skb - > len - skb - > mac_len - iph - > ihl * 4 ) ;
} else
iph - > id = htons ( id + + ) ;
2006-06-22 14:02:40 +04:00
iph - > tot_len = htons ( skb - > len - skb - > mac_len ) ;
iph - > check = 0 ;
2007-04-11 07:50:43 +04:00
iph - > check = ip_fast_csum ( skb_network_header ( skb ) , iph - > ihl ) ;
2006-06-22 14:02:40 +04:00
} while ( ( skb = skb - > next ) ) ;
out :
return segs ;
}
2008-12-16 10:41:09 +03:00
static struct sk_buff * * inet_gro_receive ( struct sk_buff * * head ,
struct sk_buff * skb )
{
2009-09-14 16:21:47 +04:00
const struct net_protocol * ops ;
2008-12-16 10:41:09 +03:00
struct sk_buff * * pp = NULL ;
struct sk_buff * p ;
struct iphdr * iph ;
2009-05-26 22:50:28 +04:00
unsigned int hlen ;
unsigned int off ;
2009-05-26 22:50:29 +04:00
unsigned int id ;
2008-12-16 10:41:09 +03:00
int flush = 1 ;
int proto ;
2009-05-26 22:50:28 +04:00
off = skb_gro_offset ( skb ) ;
hlen = off + sizeof ( * iph ) ;
iph = skb_gro_header_fast ( skb , off ) ;
if ( skb_gro_header_hard ( skb , hlen ) ) {
iph = skb_gro_header_slow ( skb , hlen , off ) ;
if ( unlikely ( ! iph ) )
goto out ;
}
2008-12-16 10:41:09 +03:00
proto = iph - > protocol & ( MAX_INET_PROTOS - 1 ) ;
rcu_read_lock ( ) ;
ops = rcu_dereference ( inet_protos [ proto ] ) ;
if ( ! ops | | ! ops - > gro_receive )
goto out_unlock ;
2009-02-08 21:00:39 +03:00
if ( * ( u8 * ) iph ! = 0x45 )
2008-12-16 10:41:09 +03:00
goto out_unlock ;
if ( unlikely ( ip_fast_csum ( ( u8 * ) iph , iph - > ihl ) ) )
goto out_unlock ;
2009-05-26 22:50:29 +04:00
id = ntohl ( * ( u32 * ) & iph - > id ) ;
flush = ( u16 ) ( ( ntohl ( * ( u32 * ) iph ) ^ skb_gro_len ( skb ) ) | ( id ^ IP_DF ) ) ;
id > > = 16 ;
2008-12-16 10:41:09 +03:00
for ( p = * head ; p ; p = p - > next ) {
struct iphdr * iph2 ;
if ( ! NAPI_GRO_CB ( p ) - > same_flow )
continue ;
iph2 = ip_hdr ( p ) ;
2009-02-08 21:00:39 +03:00
if ( ( iph - > protocol ^ iph2 - > protocol ) |
( iph - > tos ^ iph2 - > tos ) |
( iph - > saddr ^ iph2 - > saddr ) |
( iph - > daddr ^ iph2 - > daddr ) ) {
2008-12-16 10:41:09 +03:00
NAPI_GRO_CB ( p ) - > same_flow = 0 ;
continue ;
}
/* All fields must match except length and checksum. */
NAPI_GRO_CB ( p ) - > flush | =
2009-02-08 21:00:39 +03:00
( iph - > ttl ^ iph2 - > ttl ) |
( ( u16 ) ( ntohs ( iph2 - > id ) + NAPI_GRO_CB ( p ) - > count ) ^ id ) ;
2008-12-16 10:41:09 +03:00
NAPI_GRO_CB ( p ) - > flush | = flush ;
}
NAPI_GRO_CB ( skb ) - > flush | = flush ;
2009-01-29 17:19:50 +03:00
skb_gro_pull ( skb , sizeof ( * iph ) ) ;
skb_set_transport_header ( skb , skb_gro_offset ( skb ) ) ;
2008-12-16 10:41:09 +03:00
pp = ops - > gro_receive ( head , skb ) ;
out_unlock :
rcu_read_unlock ( ) ;
out :
NAPI_GRO_CB ( skb ) - > flush | = flush ;
return pp ;
}
static int inet_gro_complete ( struct sk_buff * skb )
{
2009-09-14 16:21:47 +04:00
const struct net_protocol * ops ;
2008-12-16 10:41:09 +03:00
struct iphdr * iph = ip_hdr ( skb ) ;
int proto = iph - > protocol & ( MAX_INET_PROTOS - 1 ) ;
int err = - ENOSYS ;
__be16 newlen = htons ( skb - > len - skb_network_offset ( skb ) ) ;
csum_replace2 ( & iph - > check , iph - > tot_len , newlen ) ;
iph - > tot_len = newlen ;
rcu_read_lock ( ) ;
ops = rcu_dereference ( inet_protos [ proto ] ) ;
if ( WARN_ON ( ! ops | | ! ops - > gro_complete ) )
goto out_unlock ;
err = ops - > gro_complete ( skb ) ;
out_unlock :
rcu_read_unlock ( ) ;
return err ;
}
2008-04-04 01:27:58 +04:00
int inet_ctl_sock_create ( struct sock * * sk , unsigned short family ,
2008-04-04 01:28:30 +04:00
unsigned short type , unsigned char protocol ,
struct net * net )
2008-04-04 01:22:32 +04:00
{
2008-04-04 01:27:58 +04:00
struct socket * sock ;
int rc = sock_create_kern ( family , type , protocol , & sock ) ;
2008-04-04 01:22:32 +04:00
if ( rc = = 0 ) {
2008-04-04 01:27:58 +04:00
* sk = sock - > sk ;
( * sk ) - > sk_allocation = GFP_ATOMIC ;
2008-04-04 01:22:32 +04:00
/*
* Unhash it so that IP input processing does not even see it ,
* we do not wish this socket to see incoming packets .
*/
2008-04-04 01:27:58 +04:00
( * sk ) - > sk_prot - > unhash ( * sk ) ;
2008-04-04 01:28:30 +04:00
sk_change_net ( * sk , net ) ;
2008-04-04 01:22:32 +04:00
}
return rc ;
}
EXPORT_SYMBOL_GPL ( inet_ctl_sock_create ) ;
2007-04-25 08:53:35 +04:00
unsigned long snmp_fold_field ( void * mib [ ] , int offt )
{
unsigned long res = 0 ;
int i ;
for_each_possible_cpu ( i ) {
res + = * ( ( ( unsigned long * ) per_cpu_ptr ( mib [ 0 ] , i ) ) + offt ) ;
res + = * ( ( ( unsigned long * ) per_cpu_ptr ( mib [ 1 ] , i ) ) + offt ) ;
}
return res ;
}
EXPORT_SYMBOL_GPL ( snmp_fold_field ) ;
2008-01-24 09:31:45 +03:00
int snmp_mib_init ( void * ptr [ 2 ] , size_t mibsize )
2007-04-25 08:53:35 +04:00
{
BUG_ON ( ptr = = NULL ) ;
2009-02-20 10:29:08 +03:00
ptr [ 0 ] = __alloc_percpu ( mibsize , __alignof__ ( unsigned long long ) ) ;
2007-04-25 08:53:35 +04:00
if ( ! ptr [ 0 ] )
goto err0 ;
2009-02-20 10:29:08 +03:00
ptr [ 1 ] = __alloc_percpu ( mibsize , __alignof__ ( unsigned long long ) ) ;
2007-04-25 08:53:35 +04:00
if ( ! ptr [ 1 ] )
goto err1 ;
return 0 ;
err1 :
free_percpu ( ptr [ 0 ] ) ;
ptr [ 0 ] = NULL ;
err0 :
return - ENOMEM ;
}
EXPORT_SYMBOL_GPL ( snmp_mib_init ) ;
void snmp_mib_free ( void * ptr [ 2 ] )
{
BUG_ON ( ptr = = NULL ) ;
free_percpu ( ptr [ 0 ] ) ;
free_percpu ( ptr [ 1 ] ) ;
ptr [ 0 ] = ptr [ 1 ] = NULL ;
}
EXPORT_SYMBOL_GPL ( snmp_mib_free ) ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_IP_MULTICAST
2009-09-14 16:21:47 +04:00
static const struct net_protocol igmp_protocol = {
2005-04-17 02:20:36 +04:00
. handler = igmp_rcv ,
2008-12-26 03:42:23 +03:00
. netns_ok = 1 ,
2005-04-17 02:20:36 +04:00
} ;
# endif
2009-09-14 16:21:47 +04:00
static const struct net_protocol tcp_protocol = {
2005-04-17 02:20:36 +04:00
. handler = tcp_v4_rcv ,
. err_handler = tcp_v4_err ,
2006-07-09 00:34:56 +04:00
. gso_send_check = tcp_v4_gso_send_check ,
2006-06-22 14:02:40 +04:00
. gso_segment = tcp_tso_segment ,
2008-12-16 10:43:36 +03:00
. gro_receive = tcp4_gro_receive ,
. gro_complete = tcp4_gro_complete ,
2005-04-17 02:20:36 +04:00
. no_policy = 1 ,
2008-03-25 01:34:06 +03:00
. netns_ok = 1 ,
2005-04-17 02:20:36 +04:00
} ;
2009-09-14 16:21:47 +04:00
static const struct net_protocol udp_protocol = {
2005-04-17 02:20:36 +04:00
. handler = udp_rcv ,
. err_handler = udp_err ,
2009-07-09 12:09:47 +04:00
. gso_send_check = udp4_ufo_send_check ,
. gso_segment = udp4_ufo_fragment ,
2005-04-17 02:20:36 +04:00
. no_policy = 1 ,
2008-03-25 01:34:06 +03:00
. netns_ok = 1 ,
2005-04-17 02:20:36 +04:00
} ;
2009-09-14 16:21:47 +04:00
static const struct net_protocol icmp_protocol = {
2005-04-17 02:20:36 +04:00
. handler = icmp_rcv ,
2007-12-12 21:44:43 +03:00
. no_policy = 1 ,
2008-03-25 01:34:06 +03:00
. netns_ok = 1 ,
2005-04-17 02:20:36 +04:00
} ;
2008-07-18 15:01:44 +04:00
static __net_init int ipv4_mib_init_net ( struct net * net )
{
2008-07-18 15:02:08 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . tcp_statistics ,
sizeof ( struct tcp_mib ) ) < 0 )
goto err_tcp_mib ;
2008-07-18 15:02:42 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . ip_statistics ,
sizeof ( struct ipstats_mib ) ) < 0 )
goto err_ip_mib ;
2008-07-18 15:03:08 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . net_statistics ,
sizeof ( struct linux_mib ) ) < 0 )
goto err_net_mib ;
2008-07-18 15:03:27 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . udp_statistics ,
sizeof ( struct udp_mib ) ) < 0 )
goto err_udp_mib ;
2008-07-18 15:03:45 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . udplite_statistics ,
sizeof ( struct udp_mib ) ) < 0 )
goto err_udplite_mib ;
2008-07-18 15:04:02 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . icmp_statistics ,
sizeof ( struct icmp_mib ) ) < 0 )
goto err_icmp_mib ;
2008-07-18 15:04:22 +04:00
if ( snmp_mib_init ( ( void * * ) net - > mib . icmpmsg_statistics ,
sizeof ( struct icmpmsg_mib ) ) < 0 )
goto err_icmpmsg_mib ;
2008-07-18 15:02:08 +04:00
tcp_mib_init ( net ) ;
2008-07-18 15:01:44 +04:00
return 0 ;
2008-07-18 15:02:08 +04:00
2008-07-18 15:04:22 +04:00
err_icmpmsg_mib :
snmp_mib_free ( ( void * * ) net - > mib . icmp_statistics ) ;
2008-07-18 15:04:02 +04:00
err_icmp_mib :
snmp_mib_free ( ( void * * ) net - > mib . udplite_statistics ) ;
2008-07-18 15:03:45 +04:00
err_udplite_mib :
snmp_mib_free ( ( void * * ) net - > mib . udp_statistics ) ;
2008-07-18 15:03:27 +04:00
err_udp_mib :
snmp_mib_free ( ( void * * ) net - > mib . net_statistics ) ;
2008-07-18 15:03:08 +04:00
err_net_mib :
snmp_mib_free ( ( void * * ) net - > mib . ip_statistics ) ;
2008-07-18 15:02:42 +04:00
err_ip_mib :
snmp_mib_free ( ( void * * ) net - > mib . tcp_statistics ) ;
2008-07-18 15:02:08 +04:00
err_tcp_mib :
return - ENOMEM ;
2008-07-18 15:01:44 +04:00
}
static __net_exit void ipv4_mib_exit_net ( struct net * net )
{
2008-07-18 15:04:22 +04:00
snmp_mib_free ( ( void * * ) net - > mib . icmpmsg_statistics ) ;
2008-07-18 15:04:02 +04:00
snmp_mib_free ( ( void * * ) net - > mib . icmp_statistics ) ;
2008-07-18 15:03:45 +04:00
snmp_mib_free ( ( void * * ) net - > mib . udplite_statistics ) ;
2008-07-18 15:03:27 +04:00
snmp_mib_free ( ( void * * ) net - > mib . udp_statistics ) ;
2008-07-18 15:03:08 +04:00
snmp_mib_free ( ( void * * ) net - > mib . net_statistics ) ;
2008-07-18 15:02:42 +04:00
snmp_mib_free ( ( void * * ) net - > mib . ip_statistics ) ;
2008-07-18 15:02:08 +04:00
snmp_mib_free ( ( void * * ) net - > mib . tcp_statistics ) ;
2008-07-18 15:01:44 +04:00
}
static __net_initdata struct pernet_operations ipv4_mib_ops = {
. init = ipv4_mib_init_net ,
. exit = ipv4_mib_exit_net ,
} ;
2005-04-17 02:20:36 +04:00
static int __init init_ipv4_mibs ( void )
{
2008-07-18 15:04:51 +04:00
return register_pernet_subsys ( & ipv4_mib_ops ) ;
2005-04-17 02:20:36 +04:00
}
static int ipv4_proc_init ( void ) ;
2005-07-06 01:40:10 +04:00
/*
* IP protocol layer initialiser
*/
2009-03-09 11:18:29 +03:00
static struct packet_type ip_packet_type __read_mostly = {
2009-02-01 11:45:17 +03:00
. type = cpu_to_be16 ( ETH_P_IP ) ,
2005-07-06 01:40:10 +04:00
. func = ip_rcv ,
2006-07-09 00:34:56 +04:00
. gso_send_check = inet_gso_send_check ,
2006-06-22 14:02:40 +04:00
. gso_segment = inet_gso_segment ,
2008-12-16 10:41:09 +03:00
. gro_receive = inet_gro_receive ,
. gro_complete = inet_gro_complete ,
2005-07-06 01:40:10 +04:00
} ;
2005-04-17 02:20:36 +04:00
static int __init inet_init ( void )
{
struct sk_buff * dummy_skb ;
struct inet_protosw * q ;
struct list_head * r ;
int rc = - EINVAL ;
2006-09-01 11:29:06 +04:00
BUILD_BUG_ON ( sizeof ( struct inet_skb_parm ) > sizeof ( dummy_skb - > cb ) ) ;
2005-04-17 02:20:36 +04:00
rc = proto_register ( & tcp_prot , 1 ) ;
if ( rc )
goto out ;
rc = proto_register ( & udp_prot , 1 ) ;
if ( rc )
goto out_unregister_tcp_proto ;
rc = proto_register ( & raw_prot , 1 ) ;
if ( rc )
goto out_unregister_udp_proto ;
/*
2007-02-09 17:24:47 +03:00
* Tell SOCKET that we are alive . . .
2005-04-17 02:20:36 +04:00
*/
2007-02-09 17:24:47 +03:00
( void ) sock_register ( & inet_family_ops ) ;
2005-04-17 02:20:36 +04:00
2008-07-16 00:00:59 +04:00
# ifdef CONFIG_SYSCTL
ip_static_sysctl_init ( ) ;
# endif
2005-04-17 02:20:36 +04:00
/*
* Add all the base protocols .
*/
if ( inet_add_protocol ( & icmp_protocol , IPPROTO_ICMP ) < 0 )
printk ( KERN_CRIT " inet_init: Cannot add ICMP protocol \n " ) ;
if ( inet_add_protocol ( & udp_protocol , IPPROTO_UDP ) < 0 )
printk ( KERN_CRIT " inet_init: Cannot add UDP protocol \n " ) ;
if ( inet_add_protocol ( & tcp_protocol , IPPROTO_TCP ) < 0 )
printk ( KERN_CRIT " inet_init: Cannot add TCP protocol \n " ) ;
# ifdef CONFIG_IP_MULTICAST
if ( inet_add_protocol ( & igmp_protocol , IPPROTO_IGMP ) < 0 )
printk ( KERN_CRIT " inet_init: Cannot add IGMP protocol \n " ) ;
# endif
/* Register the socket-side information for inet_create. */
for ( r = & inetsw [ 0 ] ; r < & inetsw [ SOCK_MAX ] ; + + r )
INIT_LIST_HEAD ( r ) ;
for ( q = inetsw_array ; q < & inetsw_array [ INETSW_ARRAY_LEN ] ; + + q )
inet_register_protosw ( q ) ;
/*
* Set the ARP module up
*/
arp_init ( ) ;
2007-02-09 17:24:47 +03:00
/*
* Set the IP module up
*/
2005-04-17 02:20:36 +04:00
ip_init ( ) ;
2008-02-29 22:13:15 +03:00
tcp_v4_init ( ) ;
2005-04-17 02:20:36 +04:00
/* Setup TCP slab cache for open requests. */
tcp_init ( ) ;
2007-12-31 11:29:24 +03:00
/* Setup UDP memory threshold */
udp_init ( ) ;
2006-11-27 22:10:57 +03:00
/* Add UDP-Lite (RFC 3828) */
udplite4_register ( ) ;
2005-04-17 02:20:36 +04:00
/*
* Set the ICMP layer up
*/
2008-02-29 22:14:50 +03:00
if ( icmp_init ( ) < 0 )
panic ( " Failed to create the ICMP control socket. \n " ) ;
2005-04-17 02:20:36 +04:00
/*
* Initialise the multicast router
*/
# if defined(CONFIG_IP_MROUTE)
2008-07-03 08:13:36 +04:00
if ( ip_mr_init ( ) )
printk ( KERN_CRIT " inet_init: Cannot init ipv4 mroute \n " ) ;
2005-04-17 02:20:36 +04:00
# endif
/*
* Initialise per - cpu ipv4 mibs
2007-02-09 17:24:47 +03:00
*/
2005-04-17 02:20:36 +04:00
2007-03-09 07:44:43 +03:00
if ( init_ipv4_mibs ( ) )
2008-07-03 08:13:36 +04:00
printk ( KERN_CRIT " inet_init: Cannot init ipv4 mibs \n " ) ;
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
ipv4_proc_init ( ) ;
ipfrag_init ( ) ;
2005-07-06 01:40:10 +04:00
dev_add_pack ( & ip_packet_type ) ;
2005-04-17 02:20:36 +04:00
rc = 0 ;
out :
return rc ;
out_unregister_udp_proto :
proto_unregister ( & udp_prot ) ;
2006-09-28 03:33:45 +04:00
out_unregister_tcp_proto :
proto_unregister ( & tcp_prot ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2006-04-29 02:19:17 +04:00
fs_initcall ( inet_init ) ;
2005-04-17 02:20:36 +04:00
/* ------------------------------------------------------------------------ */
# ifdef CONFIG_PROC_FS
static int __init ipv4_proc_init ( void )
{
int rc = 0 ;
if ( raw_proc_init ( ) )
goto out_raw ;
if ( tcp4_proc_init ( ) )
goto out_tcp ;
if ( udp4_proc_init ( ) )
goto out_udp ;
if ( ip_misc_proc_init ( ) )
goto out_misc ;
out :
return rc ;
out_misc :
udp4_proc_exit ( ) ;
out_udp :
tcp4_proc_exit ( ) ;
out_tcp :
raw_proc_exit ( ) ;
out_raw :
rc = - ENOMEM ;
goto out ;
}
# else /* CONFIG_PROC_FS */
static int __init ipv4_proc_init ( void )
{
return 0 ;
}
# endif /* CONFIG_PROC_FS */
MODULE_ALIAS_NETPROTO ( PF_INET ) ;