2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* The User Datagram Protocol ( UDP ) .
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Arnt Gulbrandsen , < agulbra @ nvg . unit . no >
* Alan Cox , < Alan . Cox @ linux . org >
* Hirokazu Takahashi , < taka @ valinux . co . jp >
*
* Fixes :
* Alan Cox : verify_area ( ) calls
* Alan Cox : stopped close while in use off icmp
* messages . Not a fix but a botch that
* for udp at least is ' valid ' .
* Alan Cox : Fixed icmp handling properly
* Alan Cox : Correct error for oversized datagrams
2007-02-09 17:24:47 +03:00
* Alan Cox : Tidied select ( ) semantics .
* Alan Cox : udp_err ( ) fixed properly , also now
2005-04-17 02:20:36 +04:00
* select and read wake correctly on errors
* Alan Cox : udp_send verify_area moved to avoid mem leak
* Alan Cox : UDP can count its memory
* Alan Cox : send to an unknown connection causes
* an ECONNREFUSED off the icmp , but
* does NOT close .
* Alan Cox : Switched to new sk_buff handlers . No more backlog !
* Alan Cox : Using generic datagram code . Even smaller and the PEEK
* bug no longer crashes it .
* Fred Van Kempen : Net2e support for sk - > broadcast .
* Alan Cox : Uses skb_free_datagram
* Alan Cox : Added get / set sockopt support .
* Alan Cox : Broadcasting without option set returns EACCES .
* Alan Cox : No wakeup calls . Instead we now use the callbacks .
* Alan Cox : Use ip_tos and ip_ttl
* Alan Cox : SNMP Mibs
* Alan Cox : MSG_DONTROUTE , and 0.0 .0 .0 support .
* Matt Dillon : UDP length checks .
* Alan Cox : Smarter af_inet used properly .
* Alan Cox : Use new kernel side addressing .
* Alan Cox : Incorrect return on truncated datagram receive .
* Arnt Gulbrandsen : New udp_send and stuff
* Alan Cox : Cache last socket
* Alan Cox : Route cache
* Jon Peatfield : Minor efficiency fix to sendto ( ) .
* Mike Shaver : RFC1122 checks .
* Alan Cox : Nonblocking error fix .
* Willy Konynenberg : Transparent proxying support .
* Mike McLagan : Routing by source
* David S . Miller : New socket lookup architecture .
* Last socket cache retained as it
* does have a high hit rate .
* Olaf Kirch : Don ' t linearise iovec on sendmsg .
* Andi Kleen : Some cleanups , cache destination entry
2007-02-09 17:24:47 +03:00
* for connect .
2005-04-17 02:20:36 +04:00
* Vitaly E . Lavrov : Transparent proxy revived after year coma .
* Melvin Smith : Check msg_name not msg_namelen in sendto ( ) ,
* return ENOTCONN for unconnected sockets ( POSIX )
* Janos Farkas : don ' t deliver multi / broadcasts to a different
* bound - to - device socket
* Hirokazu Takahashi : HW checksumming for outgoing UDP
* datagrams .
* Hirokazu Takahashi : sendfile ( ) on UDP works now .
* Arnaldo C . Melo : convert / proc / net / udp to seq_file
* YOSHIFUJI Hideaki @ USAGI and : Support IPV6_V6ONLY socket option , which
* Alexey Kuznetsov : allow both IPv4 and IPv6 sockets to bind
* a single port at the same time .
* Derek Atkins < derek @ ihtfp . com > : Add Encapulation Support
2007-06-28 02:37:46 +04:00
* James Chapman : Add L2TP encapsulation type .
2005-04-17 02:20:36 +04:00
*
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
# include <asm/system.h>
# include <asm/uaccess.h>
# include <asm/ioctls.h>
2007-12-31 11:29:24 +03:00
# include <linux/bootmem.h>
2005-04-17 02:20:36 +04:00
# include <linux/types.h>
# include <linux/fcntl.h>
# include <linux/module.h>
# include <linux/socket.h>
# include <linux/sockios.h>
2005-12-27 07:43:12 +03:00
# include <linux/igmp.h>
2005-04-17 02:20:36 +04:00
# include <linux/in.h>
# include <linux/errno.h>
# include <linux/timer.h>
# include <linux/mm.h>
# include <linux/inet.h>
# include <linux/netdevice.h>
2005-08-10 07:08:28 +04:00
# include <net/tcp_states.h>
2005-04-17 02:20:36 +04:00
# include <linux/skbuff.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2007-09-12 14:01:34 +04:00
# include <net/net_namespace.h>
2005-04-17 02:20:36 +04:00
# include <net/icmp.h>
# include <net/route.h>
# include <net/checksum.h>
# include <net/xfrm.h>
2006-11-27 22:10:57 +03:00
# include "udp_impl.h"
2005-04-17 02:20:36 +04:00
/*
* Snmp MIB for the UDP layer
*/
2005-08-26 23:05:31 +04:00
DEFINE_SNMP_STAT ( struct udp_mib , udp_statistics ) __read_mostly ;
2007-12-11 22:30:32 +03:00
EXPORT_SYMBOL ( udp_statistics ) ;
2005-04-17 02:20:36 +04:00
2007-12-14 22:25:26 +03:00
DEFINE_SNMP_STAT ( struct udp_mib , udp_stats_in6 ) __read_mostly ;
EXPORT_SYMBOL ( udp_stats_in6 ) ;
2005-04-17 02:20:36 +04:00
struct hlist_head udp_hash [ UDP_HTABLE_SIZE ] ;
DEFINE_RWLOCK ( udp_hash_lock ) ;
2007-12-31 11:29:24 +03:00
int sysctl_udp_mem [ 3 ] __read_mostly ;
int sysctl_udp_rmem_min __read_mostly ;
int sysctl_udp_wmem_min __read_mostly ;
EXPORT_SYMBOL ( sysctl_udp_mem ) ;
EXPORT_SYMBOL ( sysctl_udp_rmem_min ) ;
EXPORT_SYMBOL ( sysctl_udp_wmem_min ) ;
atomic_t udp_memory_allocated ;
EXPORT_SYMBOL ( udp_memory_allocated ) ;
2008-01-31 16:07:57 +03:00
static inline int __udp_lib_lport_inuse ( struct net * net , __u16 num ,
2007-08-25 10:09:41 +04:00
const struct hlist_head udptable [ ] )
2005-04-17 02:20:36 +04:00
{
2006-08-27 07:06:05 +04:00
struct sock * sk ;
2005-04-17 02:20:36 +04:00
struct hlist_node * node ;
2006-08-27 07:06:05 +04:00
2008-06-17 04:12:29 +04:00
sk_for_each ( sk , node , & udptable [ udp_hashfn ( net , num ) ] )
2008-03-25 21:57:35 +03:00
if ( net_eq ( sock_net ( sk ) , net ) & & sk - > sk_hash = = num )
2006-08-27 07:06:05 +04:00
return 1 ;
return 0 ;
}
/**
2008-03-23 02:51:21 +03:00
* udp_lib_get_port - UDP / - Lite port lookup for IPv4 and IPv6
2006-08-27 07:06:05 +04:00
*
* @ sk : socket struct in question
* @ snum : port number to look up
2007-06-06 02:18:43 +04:00
* @ saddr_comp : AF - dependent comparison of bound local IP addresses
2006-08-27 07:06:05 +04:00
*/
2008-03-23 02:51:21 +03:00
int udp_lib_get_port ( struct sock * sk , unsigned short snum ,
2007-06-06 02:18:43 +04:00
int ( * saddr_comp ) ( const struct sock * sk1 ,
const struct sock * sk2 ) )
2006-08-27 07:06:05 +04:00
{
2008-03-23 02:51:21 +03:00
struct hlist_head * udptable = sk - > sk_prot - > h . udp_hash ;
2006-08-27 07:06:05 +04:00
struct hlist_node * node ;
struct hlist_head * head ;
2005-04-17 02:20:36 +04:00
struct sock * sk2 ;
2006-08-27 07:06:05 +04:00
int error = 1 ;
2008-03-25 20:26:21 +03:00
struct net * net = sock_net ( sk ) ;
2005-04-17 02:20:36 +04:00
write_lock_bh ( & udp_hash_lock ) ;
2007-08-25 10:09:41 +04:00
if ( ! snum ) {
2007-10-19 09:00:17 +04:00
int i , low , high , remaining ;
2007-08-25 10:09:41 +04:00
unsigned rover , best , best_size_so_far ;
2007-10-11 04:30:46 +04:00
inet_get_local_port_range ( & low , & high ) ;
2007-10-19 09:00:17 +04:00
remaining = ( high - low ) + 1 ;
2007-10-11 04:30:46 +04:00
2007-08-25 10:09:41 +04:00
best_size_so_far = UINT_MAX ;
2007-10-19 09:00:17 +04:00
best = rover = net_random ( ) % remaining + low ;
2007-08-25 10:09:41 +04:00
/* 1st pass: look for empty (or shortest) hash chain */
for ( i = 0 ; i < UDP_HTABLE_SIZE ; i + + ) {
int size = 0 ;
2008-06-17 04:12:29 +04:00
head = & udptable [ udp_hashfn ( net , rover ) ] ;
2007-08-25 10:09:41 +04:00
if ( hlist_empty ( head ) )
2005-04-17 02:20:36 +04:00
goto gotit ;
2007-08-25 10:09:41 +04:00
2006-12-22 22:42:26 +03:00
sk_for_each ( sk2 , node , head ) {
if ( + + size > = best_size_so_far )
goto next ;
}
best_size_so_far = size ;
2007-08-25 10:09:41 +04:00
best = rover ;
2006-12-22 22:42:26 +03:00
next :
2007-08-25 10:09:41 +04:00
/* fold back if end of range */
if ( + + rover > high )
rover = low + ( ( rover - low )
& ( UDP_HTABLE_SIZE - 1 ) ) ;
2005-04-17 02:20:36 +04:00
}
2007-08-25 10:09:41 +04:00
/* 2nd pass: find hole in shortest hash chain */
rover = best ;
for ( i = 0 ; i < ( 1 < < 16 ) / UDP_HTABLE_SIZE ; i + + ) {
2008-01-31 16:07:57 +03:00
if ( ! __udp_lib_lport_inuse ( net , rover , udptable ) )
2007-08-25 10:09:41 +04:00
goto gotit ;
rover + = UDP_HTABLE_SIZE ;
if ( rover > high )
rover = low + ( ( rover - low )
& ( UDP_HTABLE_SIZE - 1 ) ) ;
2005-04-17 02:20:36 +04:00
}
2007-08-25 10:09:41 +04:00
/* All ports in use! */
goto fail ;
2005-04-17 02:20:36 +04:00
gotit :
2007-08-25 10:09:41 +04:00
snum = rover ;
2005-04-17 02:20:36 +04:00
} else {
2008-06-17 04:12:29 +04:00
head = & udptable [ udp_hashfn ( net , snum ) ] ;
2006-08-27 07:06:05 +04:00
sk_for_each ( sk2 , node , head )
2007-06-06 02:18:43 +04:00
if ( sk2 - > sk_hash = = snum & &
sk2 ! = sk & &
2008-03-25 21:57:35 +03:00
net_eq ( sock_net ( sk2 ) , net ) & &
2007-06-06 02:18:43 +04:00
( ! sk2 - > sk_reuse | | ! sk - > sk_reuse ) & &
( ! sk2 - > sk_bound_dev_if | | ! sk - > sk_bound_dev_if
| | sk2 - > sk_bound_dev_if = = sk - > sk_bound_dev_if ) & &
( * saddr_comp ) ( sk , sk2 ) )
2005-04-17 02:20:36 +04:00
goto fail ;
}
2007-08-25 10:09:41 +04:00
2006-08-27 07:06:05 +04:00
inet_sk ( sk ) - > num = snum ;
2007-06-06 02:18:43 +04:00
sk - > sk_hash = snum ;
2005-04-17 02:20:36 +04:00
if ( sk_unhashed ( sk ) ) {
2008-06-17 04:12:29 +04:00
head = & udptable [ udp_hashfn ( net , snum ) ] ;
2006-08-27 07:06:05 +04:00
sk_add_node ( sk , head ) ;
2008-04-01 06:41:46 +04:00
sock_prot_inuse_add ( sock_net ( sk ) , sk - > sk_prot , 1 ) ;
2005-04-17 02:20:36 +04:00
}
2006-08-27 07:06:05 +04:00
error = 0 ;
2005-04-17 02:20:36 +04:00
fail :
write_unlock_bh ( & udp_hash_lock ) ;
2006-08-27 07:06:05 +04:00
return error ;
}
2008-03-23 02:51:21 +03:00
static int ipv4_rcv_saddr_equal ( const struct sock * sk1 , const struct sock * sk2 )
2008-03-07 03:22:02 +03:00
{
struct inet_sock * inet1 = inet_sk ( sk1 ) , * inet2 = inet_sk ( sk2 ) ;
return ( ! ipv6_only_sock ( sk2 ) & &
( ! inet1 - > rcv_saddr | | ! inet2 - > rcv_saddr | |
inet1 - > rcv_saddr = = inet2 - > rcv_saddr ) ) ;
}
2008-03-23 02:51:21 +03:00
int udp_v4_get_port ( struct sock * sk , unsigned short snum )
2008-03-07 03:22:02 +03:00
{
2008-03-23 02:51:21 +03:00
return udp_lib_get_port ( sk , snum , ipv4_rcv_saddr_equal ) ;
2008-03-07 03:22:02 +03:00
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this . - DaveM
*/
static struct sock * __udp4_lib_lookup ( struct net * net , __be32 saddr ,
__be16 sport , __be32 daddr , __be16 dport ,
int dif , struct hlist_head udptable [ ] )
{
struct sock * sk , * result = NULL ;
struct hlist_node * node ;
unsigned short hnum = ntohs ( dport ) ;
int badness = - 1 ;
read_lock ( & udp_hash_lock ) ;
2008-06-17 04:12:29 +04:00
sk_for_each ( sk , node , & udptable [ udp_hashfn ( net , hnum ) ] ) {
2008-03-07 03:22:02 +03:00
struct inet_sock * inet = inet_sk ( sk ) ;
2008-03-25 21:57:35 +03:00
if ( net_eq ( sock_net ( sk ) , net ) & & sk - > sk_hash = = hnum & &
2008-03-07 03:22:02 +03:00
! ipv6_only_sock ( sk ) ) {
int score = ( sk - > sk_family = = PF_INET ? 1 : 0 ) ;
if ( inet - > rcv_saddr ) {
if ( inet - > rcv_saddr ! = daddr )
continue ;
score + = 2 ;
}
if ( inet - > daddr ) {
if ( inet - > daddr ! = saddr )
continue ;
score + = 2 ;
}
if ( inet - > dport ) {
if ( inet - > dport ! = sport )
continue ;
score + = 2 ;
}
if ( sk - > sk_bound_dev_if ) {
if ( sk - > sk_bound_dev_if ! = dif )
continue ;
score + = 2 ;
}
if ( score = = 9 ) {
result = sk ;
break ;
} else if ( score > badness ) {
result = sk ;
badness = score ;
}
}
}
if ( result )
sock_hold ( result ) ;
read_unlock ( & udp_hash_lock ) ;
return result ;
}
static inline struct sock * udp_v4_mcast_next ( struct sock * sk ,
__be16 loc_port , __be32 loc_addr ,
__be16 rmt_port , __be32 rmt_addr ,
int dif )
{
struct hlist_node * node ;
struct sock * s = sk ;
unsigned short hnum = ntohs ( loc_port ) ;
sk_for_each_from ( s , node ) {
struct inet_sock * inet = inet_sk ( s ) ;
if ( s - > sk_hash ! = hnum | |
( inet - > daddr & & inet - > daddr ! = rmt_addr ) | |
( inet - > dport ! = rmt_port & & inet - > dport ) | |
( inet - > rcv_saddr & & inet - > rcv_saddr ! = loc_addr ) | |
ipv6_only_sock ( s ) | |
( s - > sk_bound_dev_if & & s - > sk_bound_dev_if ! = dif ) )
continue ;
if ( ! ip_mc_sf_allow ( s , loc_addr , rmt_addr , dif ) )
continue ;
goto found ;
}
s = NULL ;
found :
return s ;
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition . If err < 0 then the socket should
* be closed and the error returned to the user . If err > 0
* it ' s just the icmp type < < 8 | icmp code .
* Header points to the ip header of the error packet . We move
* on past this . Then ( as it used to claim before adjustment )
* header points to the first 8 bytes of the udp header . We need
* to find the appropriate port .
*/
void __udp4_lib_err ( struct sk_buff * skb , u32 info , struct hlist_head udptable [ ] )
{
struct inet_sock * inet ;
struct iphdr * iph = ( struct iphdr * ) skb - > data ;
struct udphdr * uh = ( struct udphdr * ) ( skb - > data + ( iph - > ihl < < 2 ) ) ;
const int type = icmp_hdr ( skb ) - > type ;
const int code = icmp_hdr ( skb ) - > code ;
struct sock * sk ;
int harderr ;
int err ;
2008-03-25 15:47:49 +03:00
sk = __udp4_lib_lookup ( dev_net ( skb - > dev ) , iph - > daddr , uh - > dest ,
2008-03-07 03:22:02 +03:00
iph - > saddr , uh - > source , skb - > dev - > ifindex , udptable ) ;
if ( sk = = NULL ) {
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
return ; /* No socket for error */
}
err = 0 ;
harderr = 0 ;
inet = inet_sk ( sk ) ;
switch ( type ) {
default :
case ICMP_TIME_EXCEEDED :
err = EHOSTUNREACH ;
break ;
case ICMP_SOURCE_QUENCH :
goto out ;
case ICMP_PARAMETERPROB :
err = EPROTO ;
harderr = 1 ;
break ;
case ICMP_DEST_UNREACH :
if ( code = = ICMP_FRAG_NEEDED ) { /* Path MTU discovery */
if ( inet - > pmtudisc ! = IP_PMTUDISC_DONT ) {
err = EMSGSIZE ;
harderr = 1 ;
break ;
}
goto out ;
}
err = EHOSTUNREACH ;
if ( code < = NR_ICMP_UNREACH ) {
harderr = icmp_err_convert [ code ] . fatal ;
err = icmp_err_convert [ code ] . errno ;
}
break ;
}
/*
* RFC1122 : OK . Passes ICMP errors back to application , as per
* 4.1 .3 .3 .
*/
if ( ! inet - > recverr ) {
if ( ! harderr | | sk - > sk_state ! = TCP_ESTABLISHED )
goto out ;
} else {
ip_icmp_error ( sk , skb , err , uh - > dest , info , ( u8 * ) ( uh + 1 ) ) ;
}
sk - > sk_err = err ;
sk - > sk_error_report ( sk ) ;
out :
sock_put ( sk ) ;
}
void udp_err ( struct sk_buff * skb , u32 info )
{
__udp4_lib_err ( skb , info , udp_hash ) ;
}
/*
* Throw away all pending data and cancel the corking . Socket is locked .
*/
2008-06-04 15:49:07 +04:00
void udp_flush_pending_frames ( struct sock * sk )
2008-03-07 03:22:02 +03:00
{
struct udp_sock * up = udp_sk ( sk ) ;
if ( up - > pending ) {
up - > len = 0 ;
up - > pending = 0 ;
ip_flush_pending_frames ( sk ) ;
}
}
2008-06-04 15:49:07 +04:00
EXPORT_SYMBOL ( udp_flush_pending_frames ) ;
2008-03-07 03:22:02 +03:00
/**
* udp4_hwcsum_outgoing - handle outgoing HW checksumming
* @ sk : socket we are sending on
* @ skb : sk_buff containing the filled - in UDP header
* ( checksum field must be zeroed out )
*/
static void udp4_hwcsum_outgoing ( struct sock * sk , struct sk_buff * skb ,
__be32 src , __be32 dst , int len )
{
unsigned int offset ;
struct udphdr * uh = udp_hdr ( skb ) ;
__wsum csum = 0 ;
if ( skb_queue_len ( & sk - > sk_write_queue ) = = 1 ) {
/*
* Only one fragment on the socket .
*/
skb - > csum_start = skb_transport_header ( skb ) - skb - > head ;
skb - > csum_offset = offsetof ( struct udphdr , check ) ;
uh - > check = ~ csum_tcpudp_magic ( src , dst , len , IPPROTO_UDP , 0 ) ;
} else {
/*
* HW - checksum won ' t work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
offset = skb_transport_offset ( skb ) ;
skb - > csum = skb_checksum ( skb , offset , skb - > len - offset , 0 ) ;
skb - > ip_summed = CHECKSUM_NONE ;
skb_queue_walk ( & sk - > sk_write_queue , skb ) {
csum = csum_add ( csum , skb - > csum ) ;
}
uh - > check = csum_tcpudp_magic ( src , dst , len , IPPROTO_UDP , csum ) ;
if ( uh - > check = = 0 )
uh - > check = CSUM_MANGLED_0 ;
}
}
/*
* Push out all pending data as one UDP datagram . Socket is locked .
*/
static int udp_push_pending_frames ( struct sock * sk )
{
struct udp_sock * up = udp_sk ( sk ) ;
struct inet_sock * inet = inet_sk ( sk ) ;
struct flowi * fl = & inet - > cork . fl ;
struct sk_buff * skb ;
struct udphdr * uh ;
int err = 0 ;
int is_udplite = IS_UDPLITE ( sk ) ;
__wsum csum = 0 ;
/* Grab the skbuff where UDP header space exists. */
if ( ( skb = skb_peek ( & sk - > sk_write_queue ) ) = = NULL )
goto out ;
/*
* Create a UDP header
*/
uh = udp_hdr ( skb ) ;
uh - > source = fl - > fl_ip_sport ;
uh - > dest = fl - > fl_ip_dport ;
uh - > len = htons ( up - > len ) ;
uh - > check = 0 ;
if ( is_udplite ) /* UDP-Lite */
csum = udplite_csum_outgoing ( sk , skb ) ;
else if ( sk - > sk_no_check = = UDP_CSUM_NOXMIT ) { /* UDP csum disabled */
skb - > ip_summed = CHECKSUM_NONE ;
goto send ;
} else if ( skb - > ip_summed = = CHECKSUM_PARTIAL ) { /* UDP hardware csum */
udp4_hwcsum_outgoing ( sk , skb , fl - > fl4_src , fl - > fl4_dst , up - > len ) ;
goto send ;
} else /* `normal' UDP */
csum = udp_csum_outgoing ( sk , skb ) ;
/* add protocol-dependent pseudo-header */
uh - > check = csum_tcpudp_magic ( fl - > fl4_src , fl - > fl4_dst , up - > len ,
sk - > sk_protocol , csum ) ;
if ( uh - > check = = 0 )
uh - > check = CSUM_MANGLED_0 ;
send :
err = ip_push_pending_frames ( sk ) ;
out :
up - > len = 0 ;
up - > pending = 0 ;
if ( ! err )
2008-07-06 08:18:07 +04:00
UDP_INC_STATS_USER ( sock_net ( sk ) ,
UDP_MIB_OUTDATAGRAMS , is_udplite ) ;
2008-03-07 03:22:02 +03:00
return err ;
}
int udp_sendmsg ( struct kiocb * iocb , struct sock * sk , struct msghdr * msg ,
size_t len )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct udp_sock * up = udp_sk ( sk ) ;
int ulen = len ;
struct ipcm_cookie ipc ;
struct rtable * rt = NULL ;
int free = 0 ;
int connected = 0 ;
__be32 daddr , faddr , saddr ;
__be16 dport ;
u8 tos ;
int err , is_udplite = IS_UDPLITE ( sk ) ;
int corkreq = up - > corkflag | | msg - > msg_flags & MSG_MORE ;
int ( * getfrag ) ( void * , char * , int , int , int , struct sk_buff * ) ;
if ( len > 0xFFFF )
return - EMSGSIZE ;
/*
* Check the flags .
*/
if ( msg - > msg_flags & MSG_OOB ) /* Mirror BSD error message compatibility */
return - EOPNOTSUPP ;
ipc . opt = NULL ;
if ( up - > pending ) {
/*
* There are pending frames .
* The socket lock must be held while it ' s corked .
*/
lock_sock ( sk ) ;
if ( likely ( up - > pending ) ) {
if ( unlikely ( up - > pending ! = AF_INET ) ) {
release_sock ( sk ) ;
return - EINVAL ;
}
goto do_append_data ;
}
release_sock ( sk ) ;
}
ulen + = sizeof ( struct udphdr ) ;
/*
* Get and verify the address .
*/
if ( msg - > msg_name ) {
struct sockaddr_in * usin = ( struct sockaddr_in * ) msg - > msg_name ;
if ( msg - > msg_namelen < sizeof ( * usin ) )
return - EINVAL ;
if ( usin - > sin_family ! = AF_INET ) {
if ( usin - > sin_family ! = AF_UNSPEC )
return - EAFNOSUPPORT ;
}
daddr = usin - > sin_addr . s_addr ;
dport = usin - > sin_port ;
if ( dport = = 0 )
return - EINVAL ;
} else {
if ( sk - > sk_state ! = TCP_ESTABLISHED )
return - EDESTADDRREQ ;
daddr = inet - > daddr ;
dport = inet - > dport ;
/* Open fast path for connected socket.
Route will not be used , if at least one option is set .
*/
connected = 1 ;
}
ipc . addr = inet - > saddr ;
ipc . oif = sk - > sk_bound_dev_if ;
if ( msg - > msg_controllen ) {
2008-03-25 20:26:21 +03:00
err = ip_cmsg_send ( sock_net ( sk ) , msg , & ipc ) ;
2008-03-07 03:22:02 +03:00
if ( err )
return err ;
if ( ipc . opt )
free = 1 ;
connected = 0 ;
}
if ( ! ipc . opt )
ipc . opt = inet - > opt ;
saddr = ipc . addr ;
ipc . addr = faddr = daddr ;
if ( ipc . opt & & ipc . opt - > srr ) {
if ( ! daddr )
return - EINVAL ;
faddr = ipc . opt - > faddr ;
connected = 0 ;
}
tos = RT_TOS ( inet - > tos ) ;
if ( sock_flag ( sk , SOCK_LOCALROUTE ) | |
( msg - > msg_flags & MSG_DONTROUTE ) | |
( ipc . opt & & ipc . opt - > is_strictroute ) ) {
tos | = RTO_ONLINK ;
connected = 0 ;
}
if ( ipv4_is_multicast ( daddr ) ) {
if ( ! ipc . oif )
ipc . oif = inet - > mc_index ;
if ( ! saddr )
saddr = inet - > mc_addr ;
connected = 0 ;
}
if ( connected )
rt = ( struct rtable * ) sk_dst_check ( sk , 0 ) ;
if ( rt = = NULL ) {
struct flowi fl = { . oif = ipc . oif ,
. nl_u = { . ip4_u =
{ . daddr = faddr ,
. saddr = saddr ,
. tos = tos } } ,
. proto = sk - > sk_protocol ,
. uli_u = { . ports =
{ . sport = inet - > sport ,
. dport = dport } } } ;
security_sk_classify_flow ( sk , & fl ) ;
2008-03-25 20:26:21 +03:00
err = ip_route_output_flow ( sock_net ( sk ) , & rt , & fl , sk , 1 ) ;
2008-03-07 03:22:02 +03:00
if ( err ) {
if ( err = = - ENETUNREACH )
IP_INC_STATS_BH ( IPSTATS_MIB_OUTNOROUTES ) ;
goto out ;
}
err = - EACCES ;
if ( ( rt - > rt_flags & RTCF_BROADCAST ) & &
! sock_flag ( sk , SOCK_BROADCAST ) )
goto out ;
if ( connected )
sk_dst_set ( sk , dst_clone ( & rt - > u . dst ) ) ;
}
if ( msg - > msg_flags & MSG_CONFIRM )
goto do_confirm ;
back_from_confirm :
saddr = rt - > rt_src ;
if ( ! ipc . addr )
daddr = ipc . addr = rt - > rt_dst ;
lock_sock ( sk ) ;
if ( unlikely ( up - > pending ) ) {
/* The socket is already corked while preparing it. */
/* ... which is an evident application bug. --ANK */
release_sock ( sk ) ;
LIMIT_NETDEBUG ( KERN_DEBUG " udp cork app bug 2 \n " ) ;
err = - EINVAL ;
goto out ;
}
/*
* Now cork the socket to pend data .
*/
inet - > cork . fl . fl4_dst = daddr ;
inet - > cork . fl . fl_ip_dport = dport ;
inet - > cork . fl . fl4_src = saddr ;
inet - > cork . fl . fl_ip_sport = inet - > sport ;
up - > pending = AF_INET ;
do_append_data :
up - > len + = ulen ;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag ;
err = ip_append_data ( sk , getfrag , msg - > msg_iov , ulen ,
sizeof ( struct udphdr ) , & ipc , rt ,
corkreq ? msg - > msg_flags | MSG_MORE : msg - > msg_flags ) ;
if ( err )
udp_flush_pending_frames ( sk ) ;
else if ( ! corkreq )
err = udp_push_pending_frames ( sk ) ;
else if ( unlikely ( skb_queue_empty ( & sk - > sk_write_queue ) ) )
up - > pending = 0 ;
release_sock ( sk ) ;
out :
ip_rt_put ( rt ) ;
if ( free )
kfree ( ipc . opt ) ;
if ( ! err )
return len ;
/*
* ENOBUFS = no kernel mem , SOCK_NOSPACE = no sndbuf space . Reporting
* ENOBUFS might not be good ( it ' s not tunable per se ) , but otherwise
* we don ' t have a good statistic ( IpOutDiscards but it can be too many
* things ) . We could add another new stat but at least for now that
* seems like overkill .
*/
if ( err = = - ENOBUFS | | test_bit ( SOCK_NOSPACE , & sk - > sk_socket - > flags ) ) {
2008-07-06 08:18:07 +04:00
UDP_INC_STATS_USER ( sock_net ( sk ) ,
UDP_MIB_SNDBUFERRORS , is_udplite ) ;
2008-03-07 03:22:02 +03:00
}
return err ;
do_confirm :
dst_confirm ( & rt - > u . dst ) ;
if ( ! ( msg - > msg_flags & MSG_PROBE ) | | len )
goto back_from_confirm ;
err = 0 ;
goto out ;
}
int udp_sendpage ( struct sock * sk , struct page * page , int offset ,
size_t size , int flags )
{
struct udp_sock * up = udp_sk ( sk ) ;
int ret ;
if ( ! up - > pending ) {
struct msghdr msg = { . msg_flags = flags | MSG_MORE } ;
/* Call udp_sendmsg to specify destination address which
* sendpage interface can ' t pass .
* This will succeed only when the socket is connected .
*/
ret = udp_sendmsg ( NULL , sk , & msg , 0 ) ;
if ( ret < 0 )
return ret ;
}
lock_sock ( sk ) ;
if ( unlikely ( ! up - > pending ) ) {
release_sock ( sk ) ;
LIMIT_NETDEBUG ( KERN_DEBUG " udp cork app bug 3 \n " ) ;
return - EINVAL ;
}
ret = ip_append_page ( sk , page , offset , size , flags ) ;
if ( ret = = - EOPNOTSUPP ) {
release_sock ( sk ) ;
return sock_no_sendpage ( sk - > sk_socket , page , offset ,
size , flags ) ;
}
if ( ret < 0 ) {
udp_flush_pending_frames ( sk ) ;
goto out ;
}
up - > len + = size ;
if ( ! ( up - > corkflag | | ( flags & MSG_MORE ) ) )
ret = udp_push_pending_frames ( sk ) ;
if ( ! ret )
ret = size ;
out :
release_sock ( sk ) ;
return ret ;
}
2005-04-17 02:20:36 +04:00
/*
* IOCTL requests applicable to the UDP protocol
*/
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
int udp_ioctl ( struct sock * sk , int cmd , unsigned long arg )
{
2007-03-09 07:41:55 +03:00
switch ( cmd ) {
case SIOCOUTQ :
2005-04-17 02:20:36 +04:00
{
2007-03-09 07:41:55 +03:00
int amount = atomic_read ( & sk - > sk_wmem_alloc ) ;
return put_user ( amount , ( int __user * ) arg ) ;
}
2005-04-17 02:20:36 +04:00
2007-03-09 07:41:55 +03:00
case SIOCINQ :
{
struct sk_buff * skb ;
unsigned long amount ;
amount = 0 ;
spin_lock_bh ( & sk - > sk_receive_queue . lock ) ;
skb = skb_peek ( & sk - > sk_receive_queue ) ;
if ( skb ! = NULL ) {
/*
* We will only return the amount
* of this packet since that is all
* that will be read .
*/
amount = skb - > len - sizeof ( struct udphdr ) ;
2005-04-17 02:20:36 +04:00
}
2007-03-09 07:41:55 +03:00
spin_unlock_bh ( & sk - > sk_receive_queue . lock ) ;
return put_user ( amount , ( int __user * ) arg ) ;
}
2005-04-17 02:20:36 +04:00
2007-03-09 07:41:55 +03:00
default :
return - ENOIOCTLCMD ;
2005-04-17 02:20:36 +04:00
}
2007-03-09 07:41:55 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2008-03-07 03:22:02 +03:00
/*
* This should be easy , if there is something there we
* return it , otherwise we block .
*/
int udp_recvmsg ( struct kiocb * iocb , struct sock * sk , struct msghdr * msg ,
size_t len , int noblock , int flags , int * addr_len )
{
struct inet_sock * inet = inet_sk ( sk ) ;
struct sockaddr_in * sin = ( struct sockaddr_in * ) msg - > msg_name ;
struct sk_buff * skb ;
unsigned int ulen , copied ;
int peeked ;
int err ;
int is_udplite = IS_UDPLITE ( sk ) ;
/*
* Check any passed addresses
*/
if ( addr_len )
* addr_len = sizeof ( * sin ) ;
if ( flags & MSG_ERRQUEUE )
return ip_recv_error ( sk , msg , len ) ;
try_again :
skb = __skb_recv_datagram ( sk , flags | ( noblock ? MSG_DONTWAIT : 0 ) ,
& peeked , & err ) ;
if ( ! skb )
goto out ;
ulen = skb - > len - sizeof ( struct udphdr ) ;
copied = len ;
if ( copied > ulen )
copied = ulen ;
else if ( copied < ulen )
msg - > msg_flags | = MSG_TRUNC ;
/*
* If checksum is needed at all , try to do it while copying the
* data . If the data is truncated , or if we only want a partial
* coverage checksum ( UDP - Lite ) , do it before the copy .
*/
if ( copied < ulen | | UDP_SKB_CB ( skb ) - > partial_cov ) {
if ( udp_lib_checksum_complete ( skb ) )
goto csum_copy_err ;
}
if ( skb_csum_unnecessary ( skb ) )
err = skb_copy_datagram_iovec ( skb , sizeof ( struct udphdr ) ,
msg - > msg_iov , copied ) ;
else {
err = skb_copy_and_csum_datagram_iovec ( skb , sizeof ( struct udphdr ) , msg - > msg_iov ) ;
if ( err = = - EINVAL )
goto csum_copy_err ;
}
if ( err )
goto out_free ;
if ( ! peeked )
2008-07-06 08:18:07 +04:00
UDP_INC_STATS_USER ( sock_net ( sk ) ,
UDP_MIB_INDATAGRAMS , is_udplite ) ;
2008-03-07 03:22:02 +03:00
sock_recv_timestamp ( msg , sk , skb ) ;
/* Copy the address. */
if ( sin )
{
sin - > sin_family = AF_INET ;
sin - > sin_port = udp_hdr ( skb ) - > source ;
sin - > sin_addr . s_addr = ip_hdr ( skb ) - > saddr ;
memset ( sin - > sin_zero , 0 , sizeof ( sin - > sin_zero ) ) ;
}
if ( inet - > cmsg_flags )
ip_cmsg_recv ( msg , skb ) ;
err = copied ;
if ( flags & MSG_TRUNC )
err = ulen ;
out_free :
lock_sock ( sk ) ;
skb_free_datagram ( sk , skb ) ;
release_sock ( sk ) ;
out :
return err ;
csum_copy_err :
lock_sock ( sk ) ;
if ( ! skb_kill_datagram ( sk , skb , flags ) )
2008-07-06 08:18:07 +04:00
UDP_INC_STATS_USER ( sock_net ( sk ) , UDP_MIB_INERRORS , is_udplite ) ;
2008-03-07 03:22:02 +03:00
release_sock ( sk ) ;
if ( noblock )
return - EAGAIN ;
goto try_again ;
}
2005-04-17 02:20:36 +04:00
int udp_disconnect ( struct sock * sk , int flags )
{
struct inet_sock * inet = inet_sk ( sk ) ;
/*
* 1003.1 g - break association .
*/
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
sk - > sk_state = TCP_CLOSE ;
inet - > daddr = 0 ;
inet - > dport = 0 ;
sk - > sk_bound_dev_if = 0 ;
if ( ! ( sk - > sk_userlocks & SOCK_BINDADDR_LOCK ) )
inet_reset_saddr ( sk ) ;
if ( ! ( sk - > sk_userlocks & SOCK_BINDPORT_LOCK ) ) {
sk - > sk_prot - > unhash ( sk ) ;
inet - > sport = 0 ;
}
sk_dst_reset ( sk ) ;
return 0 ;
}
2008-03-07 03:22:02 +03:00
/* returns:
* - 1 : error
* 0 : success
* > 0 : " udp encap " protocol resubmission
*
* Note that in the success and error cases , the skb is assumed to
* have either been requeued or freed .
*/
int udp_queue_rcv_skb ( struct sock * sk , struct sk_buff * skb )
{
struct udp_sock * up = udp_sk ( sk ) ;
int rc ;
int is_udplite = IS_UDPLITE ( sk ) ;
/*
* Charge it to the socket , dropping if the queue is full .
*/
if ( ! xfrm4_policy_check ( sk , XFRM_POLICY_IN , skb ) )
goto drop ;
nf_reset ( skb ) ;
if ( up - > encap_type ) {
/*
* This is an encapsulation socket so pass the skb to
* the socket ' s udp_encap_rcv ( ) hook . Otherwise , just
* fall through and pass this up the UDP socket .
* up - > encap_rcv ( ) returns the following value :
* = 0 if skb was successfully passed to the encap
* handler or was discarded by it .
* > 0 if skb should be passed on to UDP .
* < 0 if skb should be resubmitted as proto - N
*/
/* if we're overly short, let UDP handle it */
if ( skb - > len > sizeof ( struct udphdr ) & &
up - > encap_rcv ! = NULL ) {
int ret ;
ret = ( * up - > encap_rcv ) ( sk , skb ) ;
if ( ret < = 0 ) {
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( sock_net ( sk ) ,
UDP_MIB_INDATAGRAMS ,
2008-03-07 03:22:02 +03:00
is_udplite ) ;
return - ret ;
}
}
/* FALLTHROUGH -- it's a UDP Packet */
}
/*
* UDP - Lite specific tests , ignored on UDP sockets
*/
if ( ( is_udplite & UDPLITE_RECV_CC ) & & UDP_SKB_CB ( skb ) - > partial_cov ) {
/*
* MIB statistics other than incrementing the error count are
* disabled for the following two types of errors : these depend
* on the application settings , not on the functioning of the
* protocol stack as such .
*
* RFC 3828 here recommends ( sec 3.3 ) : " There should also be a
* way . . . to . . . at least let the receiving application block
* delivery of packets with coverage values less than a value
* provided by the application . "
*/
if ( up - > pcrlen = = 0 ) { /* full coverage was set */
LIMIT_NETDEBUG ( KERN_WARNING " UDPLITE: partial coverage "
" %d while full coverage %d requested \n " ,
UDP_SKB_CB ( skb ) - > cscov , skb - > len ) ;
goto drop ;
}
/* The next case involves violating the min. coverage requested
* by the receiver . This is subtle : if receiver wants x and x is
* greater than the buffersize / MTU then receiver will complain
* that it wants x while sender emits packets of smaller size y .
* Therefore the above . . . ( ) - > partial_cov statement is essential .
*/
if ( UDP_SKB_CB ( skb ) - > cscov < up - > pcrlen ) {
LIMIT_NETDEBUG ( KERN_WARNING
" UDPLITE: coverage %d too small, need min %d \n " ,
UDP_SKB_CB ( skb ) - > cscov , up - > pcrlen ) ;
goto drop ;
}
}
if ( sk - > sk_filter ) {
if ( udp_lib_checksum_complete ( skb ) )
goto drop ;
}
if ( ( rc = sock_queue_rcv_skb ( sk , skb ) ) < 0 ) {
/* Note that an ENOMEM error is charged twice */
2008-06-18 08:04:56 +04:00
if ( rc = = - ENOMEM ) {
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( sock_net ( sk ) ,
UDP_MIB_RCVBUFERRORS , is_udplite ) ;
2008-06-18 08:04:56 +04:00
atomic_inc ( & sk - > sk_drops ) ;
}
2008-03-07 03:22:02 +03:00
goto drop ;
}
return 0 ;
drop :
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( sock_net ( sk ) , UDP_MIB_INERRORS , is_udplite ) ;
2008-03-07 03:22:02 +03:00
kfree_skb ( skb ) ;
return - 1 ;
}
/*
* Multicasts and broadcasts go to each listener .
*
* Note : called only from the BH handler context ,
* so we don ' t need to lock the hashes .
*/
2008-06-17 04:12:11 +04:00
static int __udp4_lib_mcast_deliver ( struct net * net , struct sk_buff * skb ,
2008-03-07 03:22:02 +03:00
struct udphdr * uh ,
__be32 saddr , __be32 daddr ,
struct hlist_head udptable [ ] )
{
struct sock * sk ;
int dif ;
read_lock ( & udp_hash_lock ) ;
2008-06-17 04:12:29 +04:00
sk = sk_head ( & udptable [ udp_hashfn ( net , ntohs ( uh - > dest ) ) ] ) ;
2008-03-07 03:22:02 +03:00
dif = skb - > dev - > ifindex ;
sk = udp_v4_mcast_next ( sk , uh - > dest , daddr , uh - > source , saddr , dif ) ;
if ( sk ) {
struct sock * sknext = NULL ;
do {
struct sk_buff * skb1 = skb ;
sknext = udp_v4_mcast_next ( sk_next ( sk ) , uh - > dest , daddr ,
uh - > source , saddr , dif ) ;
if ( sknext )
skb1 = skb_clone ( skb , GFP_ATOMIC ) ;
if ( skb1 ) {
int ret = 0 ;
bh_lock_sock_nested ( sk ) ;
if ( ! sock_owned_by_user ( sk ) )
ret = udp_queue_rcv_skb ( sk , skb1 ) ;
else
sk_add_backlog ( sk , skb1 ) ;
bh_unlock_sock ( sk ) ;
if ( ret > 0 )
/* we should probably re-process instead
* of dropping packets here . */
kfree_skb ( skb1 ) ;
}
sk = sknext ;
} while ( sknext ) ;
} else
kfree_skb ( skb ) ;
read_unlock ( & udp_hash_lock ) ;
return 0 ;
}
/* Initialize UDP checksum. If exited with zero value (success),
* CHECKSUM_UNNECESSARY means , that no more checks are required .
* Otherwise , csum completion requires chacksumming packet body ,
* including udp header and folding it to skb - > csum .
*/
static inline int udp4_csum_init ( struct sk_buff * skb , struct udphdr * uh ,
int proto )
{
const struct iphdr * iph ;
int err ;
UDP_SKB_CB ( skb ) - > partial_cov = 0 ;
UDP_SKB_CB ( skb ) - > cscov = skb - > len ;
if ( proto = = IPPROTO_UDPLITE ) {
err = udplite_checksum_init ( skb , uh ) ;
if ( err )
return err ;
}
iph = ip_hdr ( skb ) ;
if ( uh - > check = = 0 ) {
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
} else if ( skb - > ip_summed = = CHECKSUM_COMPLETE ) {
if ( ! csum_tcpudp_magic ( iph - > saddr , iph - > daddr , skb - > len ,
proto , skb - > csum ) )
skb - > ip_summed = CHECKSUM_UNNECESSARY ;
}
if ( ! skb_csum_unnecessary ( skb ) )
skb - > csum = csum_tcpudp_nofold ( iph - > saddr , iph - > daddr ,
skb - > len , proto , 0 ) ;
/* Probably, we should checksum udp header (it should be in cache
* in any case ) and data in tiny packets ( < rx copybreak ) .
*/
return 0 ;
}
/*
* All we need to do is get the socket , and then do a checksum .
*/
int __udp4_lib_rcv ( struct sk_buff * skb , struct hlist_head udptable [ ] ,
int proto )
{
struct sock * sk ;
struct udphdr * uh = udp_hdr ( skb ) ;
unsigned short ulen ;
struct rtable * rt = ( struct rtable * ) skb - > dst ;
__be32 saddr = ip_hdr ( skb ) - > saddr ;
__be32 daddr = ip_hdr ( skb ) - > daddr ;
2008-07-06 08:18:48 +04:00
struct net * net = dev_net ( skb - > dev ) ;
2008-03-07 03:22:02 +03:00
/*
* Validate the packet .
*/
if ( ! pskb_may_pull ( skb , sizeof ( struct udphdr ) ) )
goto drop ; /* No space for header. */
ulen = ntohs ( uh - > len ) ;
if ( ulen > skb - > len )
goto short_packet ;
if ( proto = = IPPROTO_UDP ) {
/* UDP validates ulen. */
if ( ulen < sizeof ( * uh ) | | pskb_trim_rcsum ( skb , ulen ) )
goto short_packet ;
uh = udp_hdr ( skb ) ;
}
if ( udp4_csum_init ( skb , uh , proto ) )
goto csum_error ;
if ( rt - > rt_flags & ( RTCF_BROADCAST | RTCF_MULTICAST ) )
2008-06-17 04:12:11 +04:00
return __udp4_lib_mcast_deliver ( net , skb , uh ,
saddr , daddr , udptable ) ;
2008-03-07 03:22:02 +03:00
2008-06-17 04:12:11 +04:00
sk = __udp4_lib_lookup ( net , saddr , uh - > source , daddr ,
2008-03-07 03:22:02 +03:00
uh - > dest , inet_iif ( skb ) , udptable ) ;
if ( sk ! = NULL ) {
int ret = 0 ;
bh_lock_sock_nested ( sk ) ;
if ( ! sock_owned_by_user ( sk ) )
ret = udp_queue_rcv_skb ( sk , skb ) ;
else
sk_add_backlog ( sk , skb ) ;
bh_unlock_sock ( sk ) ;
sock_put ( sk ) ;
/* a return value > 0 means to resubmit the input, but
* it wants the return to be - protocol , or 0
*/
if ( ret > 0 )
return - ret ;
return 0 ;
}
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) )
goto drop ;
nf_reset ( skb ) ;
/* No socket. Drop packet silently, if checksum is wrong */
if ( udp_lib_checksum_complete ( skb ) )
goto csum_error ;
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( net , UDP_MIB_NOPORTS , proto = = IPPROTO_UDPLITE ) ;
2008-03-07 03:22:02 +03:00
icmp_send ( skb , ICMP_DEST_UNREACH , ICMP_PORT_UNREACH , 0 ) ;
/*
* Hmm . We got an UDP packet to a port to which we
* don ' t wanna listen . Ignore it .
*/
kfree_skb ( skb ) ;
return 0 ;
short_packet :
2008-04-14 15:09:00 +04:00
LIMIT_NETDEBUG ( KERN_DEBUG " UDP%s: short packet: From " NIPQUAD_FMT " :%u %d/%d to " NIPQUAD_FMT " :%u \n " ,
2008-03-07 03:22:02 +03:00
proto = = IPPROTO_UDPLITE ? " -Lite " : " " ,
NIPQUAD ( saddr ) ,
ntohs ( uh - > source ) ,
ulen ,
skb - > len ,
NIPQUAD ( daddr ) ,
ntohs ( uh - > dest ) ) ;
goto drop ;
csum_error :
/*
* RFC1122 : OK . Discards the bad packet silently ( as far as
* the network is concerned , anyway ) as per 4.1 .3 .4 ( MUST ) .
*/
2008-04-14 15:09:00 +04:00
LIMIT_NETDEBUG ( KERN_DEBUG " UDP%s: bad checksum. From " NIPQUAD_FMT " :%u to " NIPQUAD_FMT " :%u ulen %d \n " ,
2008-03-07 03:22:02 +03:00
proto = = IPPROTO_UDPLITE ? " -Lite " : " " ,
NIPQUAD ( saddr ) ,
ntohs ( uh - > source ) ,
NIPQUAD ( daddr ) ,
ntohs ( uh - > dest ) ,
ulen ) ;
drop :
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( net , UDP_MIB_INERRORS , proto = = IPPROTO_UDPLITE ) ;
2008-03-07 03:22:02 +03:00
kfree_skb ( skb ) ;
return 0 ;
}
int udp_rcv ( struct sk_buff * skb )
{
return __udp4_lib_rcv ( skb , udp_hash , IPPROTO_UDP ) ;
}
2008-06-15 04:04:49 +04:00
void udp_destroy_sock ( struct sock * sk )
2008-03-07 03:22:02 +03:00
{
lock_sock ( sk ) ;
udp_flush_pending_frames ( sk ) ;
release_sock ( sk ) ;
}
2005-04-17 02:20:36 +04:00
/*
* Socket option code for UDP
*/
2006-11-27 20:29:59 +03:00
int udp_lib_setsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int optlen ,
int ( * push_pending_frames ) ( struct sock * ) )
2005-04-17 02:20:36 +04:00
{
struct udp_sock * up = udp_sk ( sk ) ;
int val ;
int err = 0 ;
2007-12-03 14:34:16 +03:00
int is_udplite = IS_UDPLITE ( sk ) ;
2005-04-17 02:20:36 +04:00
2007-03-09 07:41:55 +03:00
if ( optlen < sizeof ( int ) )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
if ( get_user ( val , ( int __user * ) optval ) )
return - EFAULT ;
2007-03-09 07:41:55 +03:00
switch ( optname ) {
2005-04-17 02:20:36 +04:00
case UDP_CORK :
if ( val ! = 0 ) {
up - > corkflag = 1 ;
} else {
up - > corkflag = 0 ;
lock_sock ( sk ) ;
2006-11-27 20:29:59 +03:00
( * push_pending_frames ) ( sk ) ;
2005-04-17 02:20:36 +04:00
release_sock ( sk ) ;
}
break ;
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
case UDP_ENCAP :
switch ( val ) {
case 0 :
case UDP_ENCAP_ESPINUDP :
case UDP_ENCAP_ESPINUDP_NON_IKE :
2007-07-06 04:08:05 +04:00
up - > encap_rcv = xfrm4_udp_encap_rcv ;
/* FALLTHROUGH */
2007-06-28 02:37:46 +04:00
case UDP_ENCAP_L2TPINUDP :
2005-04-17 02:20:36 +04:00
up - > encap_type = val ;
break ;
default :
err = - ENOPROTOOPT ;
break ;
}
break ;
2006-11-27 22:10:57 +03:00
/*
* UDP - Lite ' s partial checksum coverage ( RFC 3828 ) .
*/
/* The sender sets actual checksum coverage length via this option.
* The case coverage > packet length is handled by send module . */
case UDPLITE_SEND_CSCOV :
2007-12-03 14:34:16 +03:00
if ( ! is_udplite ) /* Disable the option on UDP sockets */
2006-11-27 22:10:57 +03:00
return - ENOPROTOOPT ;
if ( val ! = 0 & & val < 8 ) /* Illegal coverage: use default (8) */
val = 8 ;
up - > pcslen = val ;
up - > pcflag | = UDPLITE_SEND_CC ;
break ;
2007-02-09 17:24:47 +03:00
/* The receiver specifies a minimum checksum coverage value. To make
* sense , this should be set to at least 8 ( as done below ) . If zero is
2006-11-27 22:10:57 +03:00
* used , this again means full checksum coverage . */
case UDPLITE_RECV_CSCOV :
2007-12-03 14:34:16 +03:00
if ( ! is_udplite ) /* Disable the option on UDP sockets */
2006-11-27 22:10:57 +03:00
return - ENOPROTOOPT ;
if ( val ! = 0 & & val < 8 ) /* Avoid silly minimal values. */
val = 8 ;
up - > pcrlen = val ;
up - > pcflag | = UDPLITE_RECV_CC ;
break ;
2005-04-17 02:20:36 +04:00
default :
err = - ENOPROTOOPT ;
break ;
2007-03-09 07:41:55 +03:00
}
2005-04-17 02:20:36 +04:00
return err ;
}
2008-03-07 03:22:02 +03:00
int udp_setsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int optlen )
{
if ( level = = SOL_UDP | | level = = SOL_UDPLITE )
return udp_lib_setsockopt ( sk , level , optname , optval , optlen ,
udp_push_pending_frames ) ;
return ip_setsockopt ( sk , level , optname , optval , optlen ) ;
}
# ifdef CONFIG_COMPAT
int compat_udp_setsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int optlen )
{
if ( level = = SOL_UDP | | level = = SOL_UDPLITE )
return udp_lib_setsockopt ( sk , level , optname , optval , optlen ,
udp_push_pending_frames ) ;
return compat_ip_setsockopt ( sk , level , optname , optval , optlen ) ;
}
# endif
2006-11-27 20:29:59 +03:00
int udp_lib_getsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int __user * optlen )
2005-04-17 02:20:36 +04:00
{
struct udp_sock * up = udp_sk ( sk ) ;
int val , len ;
2007-03-09 07:41:55 +03:00
if ( get_user ( len , optlen ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
len = min_t ( unsigned int , len , sizeof ( int ) ) ;
2007-02-09 17:24:47 +03:00
2007-03-09 07:41:55 +03:00
if ( len < 0 )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2007-03-09 07:41:55 +03:00
switch ( optname ) {
2005-04-17 02:20:36 +04:00
case UDP_CORK :
val = up - > corkflag ;
break ;
case UDP_ENCAP :
val = up - > encap_type ;
break ;
2006-11-27 22:10:57 +03:00
/* The following two cannot be changed on UDP sockets, the return is
* always 0 ( which corresponds to the full checksum coverage of UDP ) . */
case UDPLITE_SEND_CSCOV :
val = up - > pcslen ;
break ;
case UDPLITE_RECV_CSCOV :
val = up - > pcrlen ;
break ;
2005-04-17 02:20:36 +04:00
default :
return - ENOPROTOOPT ;
2007-03-09 07:41:55 +03:00
}
2005-04-17 02:20:36 +04:00
2007-03-09 07:41:55 +03:00
if ( put_user ( len , optlen ) )
2007-02-09 17:24:47 +03:00
return - EFAULT ;
2007-03-09 07:41:55 +03:00
if ( copy_to_user ( optval , & val , len ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
2007-02-09 17:24:47 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2008-03-07 03:22:02 +03:00
int udp_getsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int __user * optlen )
{
if ( level = = SOL_UDP | | level = = SOL_UDPLITE )
return udp_lib_getsockopt ( sk , level , optname , optval , optlen ) ;
return ip_getsockopt ( sk , level , optname , optval , optlen ) ;
}
# ifdef CONFIG_COMPAT
int compat_udp_getsockopt ( struct sock * sk , int level , int optname ,
char __user * optval , int __user * optlen )
{
if ( level = = SOL_UDP | | level = = SOL_UDPLITE )
return udp_lib_getsockopt ( sk , level , optname , optval , optlen ) ;
return compat_ip_getsockopt ( sk , level , optname , optval , optlen ) ;
}
# endif
2005-04-17 02:20:36 +04:00
/**
* udp_poll - wait for a UDP event .
* @ file - file struct
* @ sock - socket
* @ wait - poll table
*
2007-02-09 17:24:47 +03:00
* This is same as datagram poll , except for the special case of
2005-04-17 02:20:36 +04:00
* blocking sockets . If application is using a blocking fd
* and a packet with checksum error is in the queue ;
* then it could get return from select indicating data available
* but then block when reading it . Add special case code
* to work around these arguably broken applications .
*/
unsigned int udp_poll ( struct file * file , struct socket * sock , poll_table * wait )
{
unsigned int mask = datagram_poll ( file , sock , wait ) ;
struct sock * sk = sock - > sk ;
2006-11-27 22:10:57 +03:00
int is_lite = IS_UDPLITE ( sk ) ;
2005-04-17 02:20:36 +04:00
/* Check for false positives due to checksum errors */
if ( ( mask & POLLRDNORM ) & &
! ( file - > f_flags & O_NONBLOCK ) & &
! ( sk - > sk_shutdown & RCV_SHUTDOWN ) ) {
struct sk_buff_head * rcvq = & sk - > sk_receive_queue ;
struct sk_buff * skb ;
2005-05-31 02:50:15 +04:00
spin_lock_bh ( & rcvq - > lock ) ;
2007-03-26 07:10:56 +04:00
while ( ( skb = skb_peek ( rcvq ) ) ! = NULL & &
udp_lib_checksum_complete ( skb ) ) {
2008-07-06 08:18:48 +04:00
UDP_INC_STATS_BH ( sock_net ( sk ) ,
UDP_MIB_INERRORS , is_lite ) ;
2007-03-26 07:10:56 +04:00
__skb_unlink ( skb , rcvq ) ;
kfree_skb ( skb ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-31 02:50:15 +04:00
spin_unlock_bh ( & rcvq - > lock ) ;
2005-04-17 02:20:36 +04:00
/* nothing to see, move along */
if ( skb = = NULL )
mask & = ~ ( POLLIN | POLLRDNORM ) ;
}
return mask ;
2007-02-09 17:24:47 +03:00
2005-04-17 02:20:36 +04:00
}
2008-03-07 03:22:02 +03:00
struct proto udp_prot = {
. name = " UDP " ,
. owner = THIS_MODULE ,
. close = udp_lib_close ,
. connect = ip4_datagram_connect ,
. disconnect = udp_disconnect ,
. ioctl = udp_ioctl ,
. destroy = udp_destroy_sock ,
. setsockopt = udp_setsockopt ,
. getsockopt = udp_getsockopt ,
. sendmsg = udp_sendmsg ,
. recvmsg = udp_recvmsg ,
. sendpage = udp_sendpage ,
. backlog_rcv = udp_queue_rcv_skb ,
. hash = udp_lib_hash ,
. unhash = udp_lib_unhash ,
. get_port = udp_v4_get_port ,
. memory_allocated = & udp_memory_allocated ,
. sysctl_mem = sysctl_udp_mem ,
. sysctl_wmem = & sysctl_udp_wmem_min ,
. sysctl_rmem = & sysctl_udp_rmem_min ,
. obj_size = sizeof ( struct udp_sock ) ,
2008-03-23 02:51:21 +03:00
. h . udp_hash = udp_hash ,
2008-03-07 03:22:02 +03:00
# ifdef CONFIG_COMPAT
. compat_setsockopt = compat_udp_setsockopt ,
. compat_getsockopt = compat_udp_getsockopt ,
# endif
} ;
2005-04-17 02:20:36 +04:00
/* ------------------------------------------------------------------------ */
# ifdef CONFIG_PROC_FS
static struct sock * udp_get_first ( struct seq_file * seq )
{
struct sock * sk ;
struct udp_iter_state * state = seq - > private ;
2008-03-29 04:23:33 +03:00
struct net * net = seq_file_net ( seq ) ;
2005-04-17 02:20:36 +04:00
for ( state - > bucket = 0 ; state - > bucket < UDP_HTABLE_SIZE ; + + state - > bucket ) {
struct hlist_node * node ;
2006-11-27 22:10:57 +03:00
sk_for_each ( sk , node , state - > hashtable + state - > bucket ) {
2008-03-25 21:57:35 +03:00
if ( ! net_eq ( sock_net ( sk ) , net ) )
2008-03-21 14:11:58 +03:00
continue ;
2005-04-17 02:20:36 +04:00
if ( sk - > sk_family = = state - > family )
goto found ;
}
}
sk = NULL ;
found :
return sk ;
}
static struct sock * udp_get_next ( struct seq_file * seq , struct sock * sk )
{
struct udp_iter_state * state = seq - > private ;
2008-03-29 04:23:33 +03:00
struct net * net = seq_file_net ( seq ) ;
2005-04-17 02:20:36 +04:00
do {
sk = sk_next ( sk ) ;
try_again :
;
2008-03-25 21:57:35 +03:00
} while ( sk & & ( ! net_eq ( sock_net ( sk ) , net ) | | sk - > sk_family ! = state - > family ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! sk & & + + state - > bucket < UDP_HTABLE_SIZE ) {
2006-11-27 22:10:57 +03:00
sk = sk_head ( state - > hashtable + state - > bucket ) ;
2005-04-17 02:20:36 +04:00
goto try_again ;
}
return sk ;
}
static struct sock * udp_get_idx ( struct seq_file * seq , loff_t pos )
{
struct sock * sk = udp_get_first ( seq ) ;
if ( sk )
2007-03-09 07:41:55 +03:00
while ( pos & & ( sk = udp_get_next ( seq , sk ) ) ! = NULL )
2005-04-17 02:20:36 +04:00
- - pos ;
return pos ? NULL : sk ;
}
static void * udp_seq_start ( struct seq_file * seq , loff_t * pos )
2008-01-02 08:58:02 +03:00
__acquires ( udp_hash_lock )
2005-04-17 02:20:36 +04:00
{
read_lock ( & udp_hash_lock ) ;
2008-04-01 06:38:15 +04:00
return * pos ? udp_get_idx ( seq , * pos - 1 ) : SEQ_START_TOKEN ;
2005-04-17 02:20:36 +04:00
}
static void * udp_seq_next ( struct seq_file * seq , void * v , loff_t * pos )
{
struct sock * sk ;
2008-04-01 06:38:15 +04:00
if ( v = = SEQ_START_TOKEN )
2005-04-17 02:20:36 +04:00
sk = udp_get_idx ( seq , 0 ) ;
else
sk = udp_get_next ( seq , v ) ;
+ + * pos ;
return sk ;
}
static void udp_seq_stop ( struct seq_file * seq , void * v )
2008-01-02 08:58:02 +03:00
__releases ( udp_hash_lock )
2005-04-17 02:20:36 +04:00
{
read_unlock ( & udp_hash_lock ) ;
}
static int udp_seq_open ( struct inode * inode , struct file * file )
{
struct udp_seq_afinfo * afinfo = PDE ( inode ) - > data ;
2008-03-29 04:25:06 +03:00
struct udp_iter_state * s ;
int err ;
2008-03-21 14:11:58 +03:00
2008-03-29 04:25:06 +03:00
err = seq_open_net ( inode , file , & afinfo - > seq_ops ,
sizeof ( struct udp_iter_state ) ) ;
if ( err < 0 )
return err ;
2008-03-21 14:11:58 +03:00
2008-03-29 04:25:06 +03:00
s = ( ( struct seq_file * ) file - > private_data ) - > private ;
2005-04-17 02:20:36 +04:00
s - > family = afinfo - > family ;
2006-11-27 22:10:57 +03:00
s - > hashtable = afinfo - > hashtable ;
2008-03-29 04:25:06 +03:00
return err ;
2008-03-21 14:11:58 +03:00
}
2005-04-17 02:20:36 +04:00
/* ------------------------------------------------------------------------ */
2008-03-21 14:14:17 +03:00
int udp_proc_register ( struct net * net , struct udp_seq_afinfo * afinfo )
2005-04-17 02:20:36 +04:00
{
struct proc_dir_entry * p ;
int rc = 0 ;
2008-03-29 04:25:32 +03:00
afinfo - > seq_fops . open = udp_seq_open ;
afinfo - > seq_fops . read = seq_read ;
afinfo - > seq_fops . llseek = seq_lseek ;
afinfo - > seq_fops . release = seq_release_net ;
2005-04-17 02:20:36 +04:00
2008-03-29 04:24:26 +03:00
afinfo - > seq_ops . start = udp_seq_start ;
afinfo - > seq_ops . next = udp_seq_next ;
afinfo - > seq_ops . stop = udp_seq_stop ;
2008-05-02 15:10:08 +04:00
p = proc_create_data ( afinfo - > name , S_IRUGO , net - > proc_net ,
& afinfo - > seq_fops , afinfo ) ;
if ( ! p )
2005-04-17 02:20:36 +04:00
rc = - ENOMEM ;
return rc ;
}
2008-03-21 14:14:17 +03:00
void udp_proc_unregister ( struct net * net , struct udp_seq_afinfo * afinfo )
2005-04-17 02:20:36 +04:00
{
2008-03-21 14:14:17 +03:00
proc_net_remove ( net , afinfo - > name ) ;
2005-04-17 02:20:36 +04:00
}
2008-03-07 03:22:02 +03:00
/* ------------------------------------------------------------------------ */
2008-04-24 12:02:16 +04:00
static void udp4_format_sock ( struct sock * sp , struct seq_file * f ,
int bucket , int * len )
2008-03-07 03:22:02 +03:00
{
struct inet_sock * inet = inet_sk ( sp ) ;
__be32 dest = inet - > daddr ;
__be32 src = inet - > rcv_saddr ;
__u16 destp = ntohs ( inet - > dport ) ;
__u16 srcp = ntohs ( inet - > sport ) ;
2008-04-24 12:02:16 +04:00
seq_printf ( f , " %4d: %08X:%04X %08X:%04X "
2008-06-18 08:04:56 +04:00
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n " ,
2008-03-07 03:22:02 +03:00
bucket , src , srcp , dest , destp , sp - > sk_state ,
atomic_read ( & sp - > sk_wmem_alloc ) ,
atomic_read ( & sp - > sk_rmem_alloc ) ,
0 , 0L , 0 , sock_i_uid ( sp ) , 0 , sock_i_ino ( sp ) ,
2008-06-18 08:04:56 +04:00
atomic_read ( & sp - > sk_refcnt ) , sp ,
atomic_read ( & sp - > sk_drops ) , len ) ;
2008-03-07 03:22:02 +03:00
}
int udp4_seq_show ( struct seq_file * seq , void * v )
{
if ( v = = SEQ_START_TOKEN )
seq_printf ( seq , " %-127s \n " ,
" sl local_address rem_address st tx_queue "
" rx_queue tr tm->when retrnsmt uid timeout "
2008-06-18 08:04:56 +04:00
" inode ref pointer drops " ) ;
2008-03-07 03:22:02 +03:00
else {
struct udp_iter_state * state = seq - > private ;
2008-04-24 12:02:16 +04:00
int len ;
2008-03-07 03:22:02 +03:00
2008-04-24 12:02:16 +04:00
udp4_format_sock ( v , seq , state - > bucket , & len ) ;
seq_printf ( seq , " %*s \n " , 127 - len , " " ) ;
2008-03-07 03:22:02 +03:00
}
return 0 ;
}
/* ------------------------------------------------------------------------ */
static struct udp_seq_afinfo udp4_seq_afinfo = {
. name = " udp " ,
. family = AF_INET ,
. hashtable = udp_hash ,
2008-03-29 04:25:53 +03:00
. seq_fops = {
. owner = THIS_MODULE ,
} ,
2008-03-29 04:24:26 +03:00
. seq_ops = {
. show = udp4_seq_show ,
} ,
2008-03-07 03:22:02 +03:00
} ;
2008-03-25 00:53:49 +03:00
static int udp4_proc_init_net ( struct net * net )
{
return udp_proc_register ( net , & udp4_seq_afinfo ) ;
}
static void udp4_proc_exit_net ( struct net * net )
{
udp_proc_unregister ( net , & udp4_seq_afinfo ) ;
}
static struct pernet_operations udp4_net_ops = {
. init = udp4_proc_init_net ,
. exit = udp4_proc_exit_net ,
} ;
2008-03-07 03:22:02 +03:00
int __init udp4_proc_init ( void )
{
2008-03-25 00:53:49 +03:00
return register_pernet_subsys ( & udp4_net_ops ) ;
2008-03-07 03:22:02 +03:00
}
void udp4_proc_exit ( void )
{
2008-03-25 00:53:49 +03:00
unregister_pernet_subsys ( & udp4_net_ops ) ;
2008-03-07 03:22:02 +03:00
}
2005-04-17 02:20:36 +04:00
# endif /* CONFIG_PROC_FS */
2007-12-31 11:29:24 +03:00
void __init udp_init ( void )
{
unsigned long limit ;
/* Set the pressure threshold up by the same strategy of TCP. It is a
* fraction of global memory that is up to 1 / 2 at 256 MB , decreasing
* toward zero with the amount of memory , with a floor of 128 pages .
*/
limit = min ( nr_all_pages , 1UL < < ( 28 - PAGE_SHIFT ) ) > > ( 20 - PAGE_SHIFT ) ;
limit = ( limit * ( nr_all_pages > > ( 20 - PAGE_SHIFT ) ) ) > > ( PAGE_SHIFT - 11 ) ;
limit = max ( limit , 128UL ) ;
sysctl_udp_mem [ 0 ] = limit / 4 * 3 ;
sysctl_udp_mem [ 1 ] = limit ;
sysctl_udp_mem [ 2 ] = sysctl_udp_mem [ 0 ] * 2 ;
sysctl_udp_rmem_min = SK_MEM_QUANTUM ;
sysctl_udp_wmem_min = SK_MEM_QUANTUM ;
}
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( udp_disconnect ) ;
EXPORT_SYMBOL ( udp_hash ) ;
EXPORT_SYMBOL ( udp_hash_lock ) ;
EXPORT_SYMBOL ( udp_ioctl ) ;
2008-03-07 03:22:02 +03:00
EXPORT_SYMBOL ( udp_prot ) ;
EXPORT_SYMBOL ( udp_sendmsg ) ;
2006-11-27 20:29:59 +03:00
EXPORT_SYMBOL ( udp_lib_getsockopt ) ;
EXPORT_SYMBOL ( udp_lib_setsockopt ) ;
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( udp_poll ) ;
2008-03-23 02:51:21 +03:00
EXPORT_SYMBOL ( udp_lib_get_port ) ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_PROC_FS
EXPORT_SYMBOL ( udp_proc_register ) ;
EXPORT_SYMBOL ( udp_proc_unregister ) ;
# endif