2005-04-17 02:20:36 +04:00
/*
* NET3 : Implementation of the ICMP protocol layer .
*
* Alan Cox , < alan @ redhat . com >
*
* Version : $ Id : icmp . c , v 1.85 2002 / 02 / 01 22 : 01 : 03 davem Exp $
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Some of the function names and the icmp unreach table for this
* module were derived from [ icmp . c 1.0 .11 06 / 02 / 93 ] by
* Ross Biro , Fred N . van Kempen , Mark Evans , Alan Cox , Gerhard Koerting .
* Other than that this module is a complete rewrite .
*
* Fixes :
* Clemens Fruhwirth : introduce global icmp rate limiting
* with icmp type masking ability instead
* of broken per type icmp timeouts .
* Mike Shaver : RFC1122 checks .
* Alan Cox : Multicast ping reply as self .
* Alan Cox : Fix atomicity lockup in ip_build_xmit
* call .
* Alan Cox : Added 216 , 128 byte paths to the MTU
* code .
* Martin Mares : RFC1812 checks .
* Martin Mares : Can be configured to follow redirects
* if acting as a router _without_ a
* routing protocol ( RFC 1812 ) .
* Martin Mares : Echo requests may be configured to
* be ignored ( RFC 1812 ) .
* Martin Mares : Limitation of ICMP error message
* transmit rate ( RFC 1812 ) .
* Martin Mares : TOS and Precedence set correctly
* ( RFC 1812 ) .
* Martin Mares : Now copying as much data from the
* original packet as we can without
* exceeding 576 bytes ( RFC 1812 ) .
* Willy Konynenberg : Transparent proxying support .
* Keith Owens : RFC1191 correction for 4.2 BSD based
* path MTU bug .
* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
* valid ( RFC 1812 ) .
* Andi Kleen : Check all packet lengths properly
* and moved all kfree_skb ( ) up to
* icmp_rcv .
* Andi Kleen : Move the rate limit bookkeeping
* into the dest entry and use a token
* bucket filter ( thanks to ANK ) . Make
* the rates sysctl configurable .
* Yu Tianli : Fixed two ugly bugs in icmp_send
* - IP option length was accounted wrongly
* - ICMP header length was not accounted
* at all .
* Tristan Greaves : Added sysctl option to ignore bogus
* broadcast responses from broken routers .
*
* To Fix :
*
* - Should use skb_pull ( ) instead of all the manual checking .
* This would also greatly simply some upper layer error handlers . - - AK
*
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/jiffies.h>
# include <linux/kernel.h>
# include <linux/fcntl.h>
# include <linux/socket.h>
# include <linux/in.h>
# include <linux/inet.h>
2005-12-27 07:43:12 +03:00
# include <linux/inetdevice.h>
2005-04-17 02:20:36 +04:00
# include <linux/netdevice.h>
# include <linux/string.h>
# include <linux/netfilter_ipv4.h>
# include <net/snmp.h>
# include <net/ip.h>
# include <net/route.h>
# include <net/protocol.h>
# include <net/icmp.h>
# include <net/tcp.h>
# include <net/udp.h>
# include <net/raw.h>
# include <linux/skbuff.h>
# include <net/sock.h>
# include <linux/errno.h>
# include <linux/timer.h>
# include <linux/init.h>
# include <asm/system.h>
# include <asm/uaccess.h>
# include <net/checksum.h>
2007-12-12 21:44:43 +03:00
# include <net/xfrm.h>
2005-04-17 02:20:36 +04:00
/*
* Build xmit assembly blocks
*/
struct icmp_bxm {
struct sk_buff * skb ;
int offset ;
int data_len ;
struct {
struct icmphdr icmph ;
2006-09-28 05:38:30 +04:00
__be32 times [ 3 ] ;
2005-04-17 02:20:36 +04:00
} data ;
int head_len ;
struct ip_options replyopts ;
unsigned char optbuf [ 40 ] ;
} ;
/*
* Statistics
*/
2005-08-26 23:05:31 +04:00
DEFINE_SNMP_STAT ( struct icmp_mib , icmp_statistics ) __read_mostly ;
2007-09-17 20:57:33 +04:00
DEFINE_SNMP_STAT ( struct icmpmsg_mib , icmpmsg_statistics ) __read_mostly ;
2005-04-17 02:20:36 +04:00
/* An array of errno for error messages from dest unreach. */
/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
struct icmp_err icmp_err_convert [ ] = {
{
. errno = ENETUNREACH , /* ICMP_NET_UNREACH */
. fatal = 0 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_HOST_UNREACH */
. fatal = 0 ,
} ,
{
. errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */ ,
. fatal = 1 ,
} ,
{
. errno = ECONNREFUSED , /* ICMP_PORT_UNREACH */
. fatal = 1 ,
} ,
{
. errno = EMSGSIZE , /* ICMP_FRAG_NEEDED */
. fatal = 0 ,
} ,
{
. errno = EOPNOTSUPP , /* ICMP_SR_FAILED */
. fatal = 0 ,
} ,
{
. errno = ENETUNREACH , /* ICMP_NET_UNKNOWN */
. fatal = 1 ,
} ,
{
. errno = EHOSTDOWN , /* ICMP_HOST_UNKNOWN */
. fatal = 1 ,
} ,
{
. errno = ENONET , /* ICMP_HOST_ISOLATED */
. fatal = 1 ,
} ,
{
. errno = ENETUNREACH , /* ICMP_NET_ANO */
. fatal = 1 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_HOST_ANO */
. fatal = 1 ,
} ,
{
. errno = ENETUNREACH , /* ICMP_NET_UNR_TOS */
. fatal = 0 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_HOST_UNR_TOS */
. fatal = 0 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_PKT_FILTERED */
. fatal = 1 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_PREC_VIOLATION */
. fatal = 1 ,
} ,
{
. errno = EHOSTUNREACH , /* ICMP_PREC_CUTOFF */
. fatal = 1 ,
} ,
} ;
/* Control parameters for ECHO replies. */
2006-09-23 01:15:41 +04:00
int sysctl_icmp_echo_ignore_all __read_mostly ;
int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1 ;
2005-04-17 02:20:36 +04:00
/* Control parameter - ignore bogus broadcast responses? */
2006-09-23 01:15:41 +04:00
int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1 ;
2005-04-17 02:20:36 +04:00
/*
* Configurable global rate limit .
*
* ratelimit defines tokens / packet consumed for dst - > rate_token bucket
* ratemask defines which icmp types are ratelimited by setting
* it ' s bit position .
*
* default :
* dest unreachable ( 3 ) , source quench ( 4 ) ,
* time exceeded ( 11 ) , parameter problem ( 12 )
*/
2006-09-23 01:15:41 +04:00
int sysctl_icmp_ratelimit __read_mostly = 1 * HZ ;
int sysctl_icmp_ratemask __read_mostly = 0x1818 ;
int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly ;
2005-04-17 02:20:36 +04:00
/*
* ICMP control array . This specifies what to do with each ICMP .
*/
struct icmp_control {
void ( * handler ) ( struct sk_buff * skb ) ;
short error ; /* This ICMP is classed as an error message */
} ;
2005-11-30 03:21:38 +03:00
static const struct icmp_control icmp_pointers [ NR_ICMP_TYPES + 1 ] ;
2005-04-17 02:20:36 +04:00
/*
* The ICMP socket ( s ) . This is the most convenient way to flow control
* our ICMP output as well as maintain a clean interface throughout
* all layers . All Socketless IP sends will soon be gone .
*
* On SMP we have one ICMP socket per - cpu .
*/
2008-02-29 22:16:08 +03:00
static DEFINE_PER_CPU ( struct sock * , __icmp_sk ) = NULL ;
# define icmp_sk __get_cpu_var(__icmp_sk)
2005-04-17 02:20:36 +04:00
2008-02-29 22:16:46 +03:00
static inline int icmp_xmit_lock ( struct sock * sk )
2005-04-17 02:20:36 +04:00
{
local_bh_disable ( ) ;
2008-02-29 22:16:46 +03:00
if ( unlikely ( ! spin_trylock ( & sk - > sk_lock . slock ) ) ) {
2005-04-17 02:20:36 +04:00
/* This can happen if the output path signals a
* dst_link_failure ( ) for an outgoing ICMP packet .
*/
local_bh_enable ( ) ;
return 1 ;
}
return 0 ;
}
2008-02-29 22:16:46 +03:00
static inline void icmp_xmit_unlock ( struct sock * sk )
2005-04-17 02:20:36 +04:00
{
2008-02-29 22:16:46 +03:00
spin_unlock_bh ( & sk - > sk_lock . slock ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Send an ICMP frame .
*/
/*
* Check transmit rate limitation for given message .
* The rate information is held in the destination cache now .
* This function is generic and could be used for other purposes
* too . It uses a Token bucket filter as suggested by Alexey Kuznetsov .
*
* Note that the same dst_entry fields are modified by functions in
* route . c too , but these work for packet destinations while xrlim_allow
* works for icmp destinations . This means the rate limiting information
* for one " ip object " is shared - and these ICMPs are twice limited :
* by source and by destination .
*
* RFC 1812 : 4.3 .2 .8 SHOULD be able to limit error message rate
* SHOULD allow setting of rate limits
*
* Shared between ICMPv4 and ICMPv6 .
*/
# define XRLIM_BURST_FACTOR 6
int xrlim_allow ( struct dst_entry * dst , int timeout )
{
2008-01-22 17:18:34 +03:00
unsigned long now , token = dst - > rate_tokens ;
2005-04-17 02:20:36 +04:00
int rc = 0 ;
now = jiffies ;
2008-01-22 17:18:34 +03:00
token + = now - dst - > rate_last ;
2005-04-17 02:20:36 +04:00
dst - > rate_last = now ;
2008-01-22 17:18:34 +03:00
if ( token > XRLIM_BURST_FACTOR * timeout )
token = XRLIM_BURST_FACTOR * timeout ;
if ( token > = timeout ) {
token - = timeout ;
2005-04-17 02:20:36 +04:00
rc = 1 ;
}
2008-01-22 17:18:34 +03:00
dst - > rate_tokens = token ;
2005-04-17 02:20:36 +04:00
return rc ;
}
static inline int icmpv4_xrlim_allow ( struct rtable * rt , int type , int code )
{
struct dst_entry * dst = & rt - > u . dst ;
int rc = 1 ;
if ( type > NR_ICMP_TYPES )
goto out ;
/* Don't limit PMTU discovery. */
if ( type = = ICMP_DEST_UNREACH & & code = = ICMP_FRAG_NEEDED )
goto out ;
/* No rate limit on loopback */
if ( dst - > dev & & ( dst - > dev - > flags & IFF_LOOPBACK ) )
2007-02-09 17:24:47 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
/* Limit if icmp type is enabled in ratemask. */
if ( ( 1 < < type ) & sysctl_icmp_ratemask )
rc = xrlim_allow ( dst , sysctl_icmp_ratelimit ) ;
out :
return rc ;
}
/*
* Maintain the counters used in the SNMP statistics for outgoing ICMP
*/
2007-09-17 20:57:33 +04:00
void icmp_out_count ( unsigned char type )
2005-04-17 02:20:36 +04:00
{
2007-09-17 20:57:33 +04:00
ICMPMSGOUT_INC_STATS ( type ) ;
ICMP_INC_STATS ( ICMP_MIB_OUTMSGS ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Checksum each fragment , and on the first include the headers and final
* checksum .
*/
static int icmp_glue_bits ( void * from , char * to , int offset , int len , int odd ,
struct sk_buff * skb )
{
struct icmp_bxm * icmp_param = ( struct icmp_bxm * ) from ;
2006-11-15 08:36:54 +03:00
__wsum csum ;
2005-04-17 02:20:36 +04:00
csum = skb_copy_and_csum_bits ( icmp_param - > skb ,
icmp_param - > offset + offset ,
to , len , 0 ) ;
skb - > csum = csum_block_add ( skb - > csum , csum , odd ) ;
if ( icmp_pointers [ icmp_param - > data . icmph . type ] . error )
nf_ct_attach ( skb , icmp_param - > skb ) ;
return 0 ;
}
static void icmp_push_reply ( struct icmp_bxm * icmp_param ,
struct ipcm_cookie * ipc , struct rtable * rt )
{
2008-02-29 22:15:42 +03:00
struct sock * sk ;
2005-04-17 02:20:36 +04:00
struct sk_buff * skb ;
2008-02-29 22:16:08 +03:00
sk = icmp_sk ;
2008-02-29 22:15:42 +03:00
if ( ip_append_data ( sk , icmp_glue_bits , icmp_param ,
2007-02-09 17:24:47 +03:00
icmp_param - > data_len + icmp_param - > head_len ,
icmp_param - > head_len ,
ipc , rt , MSG_DONTWAIT ) < 0 )
2008-02-29 22:15:42 +03:00
ip_flush_pending_frames ( sk ) ;
else if ( ( skb = skb_peek ( & sk - > sk_write_queue ) ) ! = NULL ) {
2007-03-13 20:43:18 +03:00
struct icmphdr * icmph = icmp_hdr ( skb ) ;
2006-11-15 08:24:49 +03:00
__wsum csum = 0 ;
2005-04-17 02:20:36 +04:00
struct sk_buff * skb1 ;
2008-02-29 22:15:42 +03:00
skb_queue_walk ( & sk - > sk_write_queue , skb1 ) {
2005-04-17 02:20:36 +04:00
csum = csum_add ( csum , skb1 - > csum ) ;
}
csum = csum_partial_copy_nocheck ( ( void * ) & icmp_param - > data ,
( char * ) icmph ,
icmp_param - > head_len , csum ) ;
icmph - > checksum = csum_fold ( csum ) ;
skb - > ip_summed = CHECKSUM_NONE ;
2008-02-29 22:15:42 +03:00
ip_push_pending_frames ( sk ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
* Driving logic for building and sending ICMP messages .
*/
static void icmp_reply ( struct icmp_bxm * icmp_param , struct sk_buff * skb )
{
2008-02-29 22:16:08 +03:00
struct sock * sk = icmp_sk ;
2005-04-17 02:20:36 +04:00
struct inet_sock * inet = inet_sk ( sk ) ;
struct ipcm_cookie ipc ;
struct rtable * rt = ( struct rtable * ) skb - > dst ;
2006-09-28 05:28:07 +04:00
__be32 daddr ;
2005-04-17 02:20:36 +04:00
if ( ip_options_echo ( & icmp_param - > replyopts , skb ) )
2006-02-03 04:03:18 +03:00
return ;
2005-04-17 02:20:36 +04:00
2008-02-29 22:16:46 +03:00
if ( icmp_xmit_lock ( sk ) )
2005-04-17 02:20:36 +04:00
return ;
icmp_param - > data . icmph . checksum = 0 ;
2007-04-21 09:47:35 +04:00
inet - > tos = ip_hdr ( skb ) - > tos ;
2005-04-17 02:20:36 +04:00
daddr = ipc . addr = rt - > rt_src ;
ipc . opt = NULL ;
if ( icmp_param - > replyopts . optlen ) {
ipc . opt = & icmp_param - > replyopts ;
if ( ipc . opt - > srr )
daddr = icmp_param - > replyopts . faddr ;
}
{
struct flowi fl = { . nl_u = { . ip4_u =
{ . daddr = daddr ,
. saddr = rt - > rt_spec_dst ,
2007-04-21 09:47:35 +04:00
. tos = RT_TOS ( ip_hdr ( skb ) - > tos ) } } ,
2005-04-17 02:20:36 +04:00
. proto = IPPROTO_ICMP } ;
2006-08-05 10:12:42 +04:00
security_skb_classify_flow ( skb , & fl ) ;
2008-01-23 10:50:57 +03:00
if ( ip_route_output_key ( rt - > u . dst . dev - > nd_net , & rt , & fl ) )
2005-04-17 02:20:36 +04:00
goto out_unlock ;
}
if ( icmpv4_xrlim_allow ( rt , icmp_param - > data . icmph . type ,
icmp_param - > data . icmph . code ) )
icmp_push_reply ( icmp_param , & ipc , rt ) ;
ip_rt_put ( rt ) ;
out_unlock :
2008-02-29 22:16:46 +03:00
icmp_xmit_unlock ( sk ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Send an ICMP message in response to a situation
*
* RFC 1122 : 3.2 .2 MUST send at least the IP header and 8 bytes of header .
* MAY send more ( we do ) .
* MUST NOT change this header information .
* MUST NOT reply to a multicast / broadcast IP address .
* MUST NOT reply to a multicast / broadcast MAC address .
* MUST reply to only the first fragment .
*/
2006-09-27 09:17:28 +04:00
void icmp_send ( struct sk_buff * skb_in , int type , int code , __be32 info )
2005-04-17 02:20:36 +04:00
{
struct iphdr * iph ;
int room ;
struct icmp_bxm icmp_param ;
struct rtable * rt = ( struct rtable * ) skb_in - > dst ;
struct ipcm_cookie ipc ;
2006-09-27 08:27:54 +04:00
__be32 saddr ;
2005-04-17 02:20:36 +04:00
u8 tos ;
2008-01-23 10:50:57 +03:00
struct net * net ;
2008-02-29 22:16:46 +03:00
struct sock * sk = icmp_sk ;
2005-04-17 02:20:36 +04:00
if ( ! rt )
goto out ;
2008-01-23 10:50:57 +03:00
net = rt - > u . dst . dev - > nd_net ;
2005-04-17 02:20:36 +04:00
/*
* Find the original header . It is expected to be valid , of course .
* Check this , icmp_send is called from the most obscure devices
* sometimes .
*/
2007-04-21 09:47:35 +04:00
iph = ip_hdr ( skb_in ) ;
2005-04-17 02:20:36 +04:00
2007-04-20 07:29:13 +04:00
if ( ( u8 * ) iph < skb_in - > head | |
( skb_in - > network_header + sizeof ( * iph ) ) > skb_in - > tail )
2005-04-17 02:20:36 +04:00
goto out ;
/*
* No replies to physical multicast / broadcast
*/
if ( skb_in - > pkt_type ! = PACKET_HOST )
goto out ;
/*
* Now check at the protocol level
*/
if ( rt - > rt_flags & ( RTCF_BROADCAST | RTCF_MULTICAST ) )
goto out ;
/*
* Only reply to fragment 0. We byte re - order the constant
* mask for efficiency .
*/
if ( iph - > frag_off & htons ( IP_OFFSET ) )
goto out ;
/*
* If we send an ICMP error to an ICMP error a mess would result . .
*/
if ( icmp_pointers [ type ] . error ) {
/*
* We are an error , check if we are replying to an
* ICMP error
*/
if ( iph - > protocol = = IPPROTO_ICMP ) {
u8 _inner_type , * itp ;
itp = skb_header_pointer ( skb_in ,
2007-04-11 07:50:43 +04:00
skb_network_header ( skb_in ) +
2005-04-17 02:20:36 +04:00
( iph - > ihl < < 2 ) +
offsetof ( struct icmphdr ,
type ) -
skb_in - > data ,
sizeof ( _inner_type ) ,
& _inner_type ) ;
if ( itp = = NULL )
goto out ;
/*
* Assume any unknown ICMP type is an error . This
* isn ' t specified by the RFC , but think about it . .
*/
if ( * itp > NR_ICMP_TYPES | |
icmp_pointers [ * itp ] . error )
goto out ;
}
}
2008-02-29 22:16:46 +03:00
if ( icmp_xmit_lock ( sk ) )
2005-04-17 02:20:36 +04:00
return ;
/*
* Construct source address and options .
*/
saddr = iph - > daddr ;
2005-06-14 02:19:03 +04:00
if ( ! ( rt - > rt_flags & RTCF_LOCAL ) ) {
2007-06-01 22:45:04 +04:00
struct net_device * dev = NULL ;
if ( rt - > fl . iif & & sysctl_icmp_errors_use_inbound_ifaddr )
2008-01-23 10:50:57 +03:00
dev = dev_get_by_index ( net , rt - > fl . iif ) ;
2007-06-01 22:45:04 +04:00
if ( dev ) {
saddr = inet_select_addr ( dev , 0 , RT_SCOPE_LINK ) ;
dev_put ( dev ) ;
} else
2005-06-14 02:19:03 +04:00
saddr = 0 ;
}
2005-04-17 02:20:36 +04:00
tos = icmp_pointers [ type ] . error ? ( ( iph - > tos & IPTOS_TOS_MASK ) |
IPTOS_PREC_INTERNETCONTROL ) :
iph - > tos ;
if ( ip_options_echo ( & icmp_param . replyopts , skb_in ) )
2006-02-04 13:09:34 +03:00
goto out_unlock ;
2005-04-17 02:20:36 +04:00
/*
* Prepare data for ICMP header .
*/
icmp_param . data . icmph . type = type ;
icmp_param . data . icmph . code = code ;
icmp_param . data . icmph . un . gateway = info ;
icmp_param . data . icmph . checksum = 0 ;
icmp_param . skb = skb_in ;
2007-04-11 07:50:43 +04:00
icmp_param . offset = skb_network_offset ( skb_in ) ;
2008-02-29 22:16:46 +03:00
inet_sk ( sk ) - > tos = tos ;
2005-04-17 02:20:36 +04:00
ipc . addr = iph - > saddr ;
ipc . opt = & icmp_param . replyopts ;
{
struct flowi fl = {
. nl_u = {
. ip4_u = {
. daddr = icmp_param . replyopts . srr ?
icmp_param . replyopts . faddr :
iph - > saddr ,
. saddr = saddr ,
. tos = RT_TOS ( tos )
}
} ,
. proto = IPPROTO_ICMP ,
. uli_u = {
. icmpt = {
. type = type ,
. code = code
}
}
} ;
2007-12-12 21:44:43 +03:00
int err ;
struct rtable * rt2 ;
2006-08-05 10:12:42 +04:00
security_skb_classify_flow ( skb_in , & fl ) ;
2008-01-23 10:50:57 +03:00
if ( __ip_route_output_key ( net , & rt , & fl ) )
2007-12-12 21:44:43 +03:00
goto out_unlock ;
/* No need to clone since we're just using its address. */
rt2 = rt ;
err = xfrm_lookup ( ( struct dst_entry * * ) & rt , & fl , NULL , 0 ) ;
switch ( err ) {
case 0 :
if ( rt ! = rt2 )
goto route_done ;
break ;
case - EPERM :
rt = NULL ;
break ;
default :
goto out_unlock ;
}
if ( xfrm_decode_session_reverse ( skb_in , & fl , AF_INET ) )
goto out_unlock ;
2008-01-23 10:50:57 +03:00
if ( inet_addr_type ( net , fl . fl4_src ) = = RTN_LOCAL )
err = __ip_route_output_key ( net , & rt2 , & fl ) ;
2007-12-12 21:44:43 +03:00
else {
struct flowi fl2 = { } ;
struct dst_entry * odst ;
fl2 . fl4_dst = fl . fl4_src ;
2008-01-23 10:50:57 +03:00
if ( ip_route_output_key ( net , & rt2 , & fl2 ) )
2007-12-12 21:44:43 +03:00
goto out_unlock ;
/* Ugh! */
odst = skb_in - > dst ;
err = ip_route_input ( skb_in , fl . fl4_dst , fl . fl4_src ,
RT_TOS ( tos ) , rt2 - > u . dst . dev ) ;
dst_release ( & rt2 - > u . dst ) ;
rt2 = ( struct rtable * ) skb_in - > dst ;
skb_in - > dst = odst ;
}
if ( err )
goto out_unlock ;
err = xfrm_lookup ( ( struct dst_entry * * ) & rt2 , & fl , NULL ,
XFRM_LOOKUP_ICMP ) ;
if ( err = = - ENOENT ) {
if ( ! rt )
goto out_unlock ;
goto route_done ;
}
dst_release ( & rt - > u . dst ) ;
rt = rt2 ;
if ( err )
2005-04-17 02:20:36 +04:00
goto out_unlock ;
}
2007-12-12 21:44:43 +03:00
route_done :
2005-04-17 02:20:36 +04:00
if ( ! icmpv4_xrlim_allow ( rt , type , code ) )
goto ende ;
/* RFC says return as much as we can without exceeding 576 bytes. */
room = dst_mtu ( & rt - > u . dst ) ;
if ( room > 576 )
room = 576 ;
room - = sizeof ( struct iphdr ) + icmp_param . replyopts . optlen ;
room - = sizeof ( struct icmphdr ) ;
icmp_param . data_len = skb_in - > len - icmp_param . offset ;
if ( icmp_param . data_len > room )
icmp_param . data_len = room ;
icmp_param . head_len = sizeof ( struct icmphdr ) ;
icmp_push_reply ( & icmp_param , & ipc , rt ) ;
ende :
ip_rt_put ( rt ) ;
out_unlock :
2008-02-29 22:16:46 +03:00
icmp_xmit_unlock ( sk ) ;
2005-04-17 02:20:36 +04:00
out : ;
}
/*
* Handle ICMP_DEST_UNREACH , ICMP_TIME_EXCEED , and ICMP_QUENCH .
*/
static void icmp_unreach ( struct sk_buff * skb )
{
struct iphdr * iph ;
struct icmphdr * icmph ;
int hash , protocol ;
struct net_protocol * ipprot ;
u32 info = 0 ;
2008-01-23 10:50:57 +03:00
struct net * net ;
net = skb - > dst - > dev - > nd_net ;
2005-04-17 02:20:36 +04:00
/*
* Incomplete header ?
* Only checks for the IP header , there should be an
* additional check for longer headers in upper levels .
*/
if ( ! pskb_may_pull ( skb , sizeof ( struct iphdr ) ) )
goto out_err ;
2007-03-13 20:43:18 +03:00
icmph = icmp_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
iph = ( struct iphdr * ) skb - > data ;
if ( iph - > ihl < 5 ) /* Mangled header, drop. */
goto out_err ;
if ( icmph - > type = = ICMP_DEST_UNREACH ) {
switch ( icmph - > code & 15 ) {
case ICMP_NET_UNREACH :
case ICMP_HOST_UNREACH :
case ICMP_PROT_UNREACH :
case ICMP_PORT_UNREACH :
break ;
case ICMP_FRAG_NEEDED :
if ( ipv4_config . no_pmtu_disc ) {
2005-08-10 07:50:53 +04:00
LIMIT_NETDEBUG ( KERN_INFO " ICMP: %u.%u.%u.%u: "
2005-04-17 02:20:36 +04:00
" fragmentation needed "
" and DF set. \n " ,
2005-08-10 07:50:53 +04:00
NIPQUAD ( iph - > daddr ) ) ;
2005-04-17 02:20:36 +04:00
} else {
2008-01-23 10:50:57 +03:00
info = ip_rt_frag_needed ( net , iph ,
2005-04-17 02:20:36 +04:00
ntohs ( icmph - > un . frag . mtu ) ) ;
if ( ! info )
goto out ;
}
break ;
case ICMP_SR_FAILED :
2005-08-10 07:50:53 +04:00
LIMIT_NETDEBUG ( KERN_INFO " ICMP: %u.%u.%u.%u: Source "
2005-04-17 02:20:36 +04:00
" Route Failed. \n " ,
2005-08-10 07:50:53 +04:00
NIPQUAD ( iph - > daddr ) ) ;
2005-04-17 02:20:36 +04:00
break ;
default :
break ;
}
if ( icmph - > code > NR_ICMP_UNREACH )
goto out ;
} else if ( icmph - > type = = ICMP_PARAMETERPROB )
info = ntohl ( icmph - > un . gateway ) > > 24 ;
/*
* Throw it at our lower layers
*
* RFC 1122 : 3.2 .2 MUST extract the protocol ID from the passed
* header .
* RFC 1122 : 3.2 .2 .1 MUST pass ICMP unreach messages to the
* transport layer .
* RFC 1122 : 3.2 .2 .2 MUST pass ICMP time expired messages to
* transport layer .
*/
/*
* Check the other end isnt violating RFC 1122. Some routers send
* bogus responses to broadcast frames . If you see this message
* first check your netmask matches at both ends , if it does then
* get the other vendor to fix their kit .
*/
if ( ! sysctl_icmp_ignore_bogus_error_responses & &
2008-01-23 10:50:57 +03:00
inet_addr_type ( net , iph - > daddr ) = = RTN_BROADCAST ) {
2005-04-17 02:20:36 +04:00
if ( net_ratelimit ( ) )
printk ( KERN_WARNING " %u.%u.%u.%u sent an invalid ICMP "
" type %u, code %u "
" error to a broadcast: %u.%u.%u.%u on %s \n " ,
2007-04-21 09:47:35 +04:00
NIPQUAD ( ip_hdr ( skb ) - > saddr ) ,
2005-04-17 02:20:36 +04:00
icmph - > type , icmph - > code ,
NIPQUAD ( iph - > daddr ) ,
skb - > dev - > name ) ;
goto out ;
}
/* Checkin full IP header plus 8 bytes of protocol to
* avoid additional coding at protocol handlers .
*/
if ( ! pskb_may_pull ( skb , iph - > ihl * 4 + 8 ) )
goto out ;
iph = ( struct iphdr * ) skb - > data ;
protocol = iph - > protocol ;
/*
* Deliver ICMP message to raw sockets . Pretty useless feature ?
*/
2007-11-20 09:35:07 +03:00
raw_icmp_error ( skb , protocol , info ) ;
2005-04-17 02:20:36 +04:00
hash = protocol & ( MAX_INET_PROTOS - 1 ) ;
rcu_read_lock ( ) ;
ipprot = rcu_dereference ( inet_protos [ hash ] ) ;
if ( ipprot & & ipprot - > err_handler )
ipprot - > err_handler ( skb , info ) ;
rcu_read_unlock ( ) ;
out :
return ;
out_err :
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
goto out ;
}
/*
* Handle ICMP_REDIRECT .
*/
static void icmp_redirect ( struct sk_buff * skb )
{
struct iphdr * iph ;
if ( skb - > len < sizeof ( struct iphdr ) )
goto out_err ;
/*
* Get the copied header of the packet that caused the redirect
*/
if ( ! pskb_may_pull ( skb , sizeof ( struct iphdr ) ) )
goto out ;
iph = ( struct iphdr * ) skb - > data ;
2007-03-13 20:43:18 +03:00
switch ( icmp_hdr ( skb ) - > code & 7 ) {
2005-04-17 02:20:36 +04:00
case ICMP_REDIR_NET :
case ICMP_REDIR_NETTOS :
/*
* As per RFC recommendations now handle it as a host redirect .
*/
case ICMP_REDIR_HOST :
case ICMP_REDIR_HOSTTOS :
2007-04-21 09:47:35 +04:00
ip_rt_redirect ( ip_hdr ( skb ) - > saddr , iph - > daddr ,
2007-03-13 20:43:18 +03:00
icmp_hdr ( skb ) - > un . gateway ,
2006-03-25 12:38:55 +03:00
iph - > saddr , skb - > dev ) ;
2005-04-17 02:20:36 +04:00
break ;
2007-02-09 17:24:47 +03:00
}
2005-04-17 02:20:36 +04:00
out :
return ;
out_err :
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
goto out ;
}
/*
* Handle ICMP_ECHO ( " ping " ) requests .
*
* RFC 1122 : 3.2 .2 .6 MUST have an echo server that answers ICMP echo
* requests .
* RFC 1122 : 3.2 .2 .6 Data received in the ICMP_ECHO request MUST be
* included in the reply .
* RFC 1812 : 4.3 .3 .6 SHOULD have a config option for silently ignoring
* echo requests , MUST have default = NOT .
* See also WRT handling of options once they are done and working .
*/
static void icmp_echo ( struct sk_buff * skb )
{
if ( ! sysctl_icmp_echo_ignore_all ) {
struct icmp_bxm icmp_param ;
2007-03-13 20:43:18 +03:00
icmp_param . data . icmph = * icmp_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
icmp_param . data . icmph . type = ICMP_ECHOREPLY ;
icmp_param . skb = skb ;
icmp_param . offset = 0 ;
icmp_param . data_len = skb - > len ;
icmp_param . head_len = sizeof ( struct icmphdr ) ;
icmp_reply ( & icmp_param , skb ) ;
}
}
/*
* Handle ICMP Timestamp requests .
* RFC 1122 : 3.2 .2 .8 MAY implement ICMP timestamp requests .
* SHOULD be in the kernel for minimum random latency .
* MUST be accurate to a few minutes .
* MUST be updated at least at 15 Hz .
*/
static void icmp_timestamp ( struct sk_buff * skb )
{
struct timeval tv ;
struct icmp_bxm icmp_param ;
/*
* Too short .
*/
if ( skb - > len < 4 )
goto out_err ;
/*
* Fill in the current time as ms since midnight UT :
*/
do_gettimeofday ( & tv ) ;
icmp_param . data . times [ 1 ] = htonl ( ( tv . tv_sec % 86400 ) * 1000 +
tv . tv_usec / 1000 ) ;
icmp_param . data . times [ 2 ] = icmp_param . data . times [ 1 ] ;
if ( skb_copy_bits ( skb , 0 , & icmp_param . data . times [ 0 ] , 4 ) )
BUG ( ) ;
2007-03-13 20:43:18 +03:00
icmp_param . data . icmph = * icmp_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
icmp_param . data . icmph . type = ICMP_TIMESTAMPREPLY ;
icmp_param . data . icmph . code = 0 ;
icmp_param . skb = skb ;
icmp_param . offset = 0 ;
icmp_param . data_len = 0 ;
icmp_param . head_len = sizeof ( struct icmphdr ) + 12 ;
icmp_reply ( & icmp_param , skb ) ;
out :
return ;
out_err :
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
goto out ;
}
/*
* Handle ICMP_ADDRESS_MASK requests . ( RFC950 )
*
* RFC1122 ( 3.2 .2 .9 ) . A host MUST only send replies to
* ADDRESS_MASK requests if it ' s been configured as an address mask
* agent . Receiving a request doesn ' t constitute implicit permission to
* act as one . Of course , implementing this correctly requires ( SHOULD )
* a way to turn the functionality on and off . Another one for sysctl ( ) ,
* I guess . - - MS
*
* RFC1812 ( 4.3 .3 .9 ) . A router MUST implement it .
* A router SHOULD have switch turning it on / off .
* This switch MUST be ON by default .
*
* Gratuitous replies , zero - source replies are not implemented ,
* that complies with RFC . DO NOT implement them ! ! ! All the idea
* of broadcast addrmask replies as specified in RFC950 is broken .
* The problem is that it is not uncommon to have several prefixes
* on one physical interface . Moreover , addrmask agent can even be
* not aware of existing another prefixes .
* If source is zero , addrmask agent cannot choose correct prefix .
* Gratuitous mask announcements suffer from the same problem .
* RFC1812 explains it , but still allows to use ADDRMASK ,
* that is pretty silly . - - ANK
*
* All these rules are so bizarre , that I removed kernel addrmask
* support at all . It is wrong , it is obsolete , nobody uses it in
* any case . - - ANK
*
* Furthermore you can do it with a usermode address agent program
* anyway . . .
*/
static void icmp_address ( struct sk_buff * skb )
{
#if 0
if ( net_ratelimit ( ) )
printk ( KERN_DEBUG " a guy asks for address mask. Who is it? \n " ) ;
# endif
}
/*
* RFC1812 ( 4.3 .3 .9 ) . A router SHOULD listen all replies , and complain
* loudly if an inconsistency is found .
*/
static void icmp_address_reply ( struct sk_buff * skb )
{
struct rtable * rt = ( struct rtable * ) skb - > dst ;
struct net_device * dev = skb - > dev ;
struct in_device * in_dev ;
struct in_ifaddr * ifa ;
if ( skb - > len < 4 | | ! ( rt - > rt_flags & RTCF_DIRECTSRC ) )
goto out ;
in_dev = in_dev_get ( dev ) ;
if ( ! in_dev )
goto out ;
rcu_read_lock ( ) ;
if ( in_dev - > ifa_list & &
IN_DEV_LOG_MARTIANS ( in_dev ) & &
IN_DEV_FORWARD ( in_dev ) ) {
2006-09-29 05:00:55 +04:00
__be32 _mask , * mp ;
2005-04-17 02:20:36 +04:00
mp = skb_header_pointer ( skb , 0 , sizeof ( _mask ) , & _mask ) ;
2006-01-09 09:24:28 +03:00
BUG_ON ( mp = = NULL ) ;
2005-04-17 02:20:36 +04:00
for ( ifa = in_dev - > ifa_list ; ifa ; ifa = ifa - > ifa_next ) {
if ( * mp = = ifa - > ifa_mask & &
inet_ifa_match ( rt - > rt_src , ifa ) )
break ;
}
if ( ! ifa & & net_ratelimit ( ) ) {
printk ( KERN_INFO " Wrong address mask %u.%u.%u.%u from "
" %s/%u.%u.%u.%u \n " ,
NIPQUAD ( * mp ) , dev - > name , NIPQUAD ( rt - > rt_src ) ) ;
}
}
rcu_read_unlock ( ) ;
in_dev_put ( in_dev ) ;
out : ;
}
static void icmp_discard ( struct sk_buff * skb )
{
}
/*
* Deal with incoming ICMP packets .
*/
int icmp_rcv ( struct sk_buff * skb )
{
struct icmphdr * icmph ;
struct rtable * rt = ( struct rtable * ) skb - > dst ;
2007-12-13 05:54:16 +03:00
if ( ! xfrm4_policy_check ( NULL , XFRM_POLICY_IN , skb ) ) {
2007-12-12 21:44:43 +03:00
int nh ;
2007-12-13 05:54:16 +03:00
if ( ! ( skb - > sp & & skb - > sp - > xvec [ skb - > sp - > len - 1 ] - > props . flags &
XFRM_STATE_ICMP ) )
goto drop ;
2007-12-12 21:44:43 +03:00
if ( ! pskb_may_pull ( skb , sizeof ( * icmph ) + sizeof ( struct iphdr ) ) )
goto drop ;
nh = skb_network_offset ( skb ) ;
skb_set_network_header ( skb , sizeof ( * icmph ) ) ;
if ( ! xfrm4_policy_check_reverse ( NULL , XFRM_POLICY_IN , skb ) )
goto drop ;
skb_set_network_header ( skb , nh ) ;
}
2005-04-17 02:20:36 +04:00
ICMP_INC_STATS_BH ( ICMP_MIB_INMSGS ) ;
switch ( skb - > ip_summed ) {
2006-08-30 03:44:56 +04:00
case CHECKSUM_COMPLETE :
2006-11-15 08:24:49 +03:00
if ( ! csum_fold ( skb - > csum ) )
2005-04-17 02:20:36 +04:00
break ;
2005-11-11 00:01:24 +03:00
/* fall through */
2005-04-17 02:20:36 +04:00
case CHECKSUM_NONE :
2005-11-11 00:01:24 +03:00
skb - > csum = 0 ;
if ( __skb_checksum_complete ( skb ) )
2005-04-17 02:20:36 +04:00
goto error ;
}
2008-02-05 14:15:50 +03:00
if ( ! pskb_pull ( skb , sizeof ( * icmph ) ) )
goto error ;
2005-04-17 02:20:36 +04:00
2007-03-13 20:43:18 +03:00
icmph = icmp_hdr ( skb ) ;
2005-04-17 02:20:36 +04:00
2007-09-17 20:57:33 +04:00
ICMPMSGIN_INC_STATS_BH ( icmph - > type ) ;
2005-04-17 02:20:36 +04:00
/*
* 18 is the highest ' known ' ICMP type . Anything else is a mystery
*
* RFC 1122 : 3.2 .2 Unknown ICMP messages types MUST be silently
* discarded .
*/
if ( icmph - > type > NR_ICMP_TYPES )
goto error ;
/*
* Parse the ICMP message
*/
2007-02-09 17:24:47 +03:00
if ( rt - > rt_flags & ( RTCF_BROADCAST | RTCF_MULTICAST ) ) {
2005-04-17 02:20:36 +04:00
/*
* RFC 1122 : 3.2 .2 .6 An ICMP_ECHO to broadcast MAY be
* silently ignored ( we let user decide with a sysctl ) .
* RFC 1122 : 3.2 .2 .8 An ICMP_TIMESTAMP MAY be silently
* discarded if to broadcast / multicast .
*/
2005-07-09 04:34:46 +04:00
if ( ( icmph - > type = = ICMP_ECHO | |
icmph - > type = = ICMP_TIMESTAMP ) & &
2005-04-17 02:20:36 +04:00
sysctl_icmp_echo_ignore_broadcasts ) {
goto error ;
}
if ( icmph - > type ! = ICMP_ECHO & &
icmph - > type ! = ICMP_TIMESTAMP & &
icmph - > type ! = ICMP_ADDRESS & &
icmph - > type ! = ICMP_ADDRESSREPLY ) {
goto error ;
2007-02-09 17:24:47 +03:00
}
2005-04-17 02:20:36 +04:00
}
icmp_pointers [ icmph - > type ] . handler ( skb ) ;
drop :
kfree_skb ( skb ) ;
return 0 ;
error :
ICMP_INC_STATS_BH ( ICMP_MIB_INERRORS ) ;
goto drop ;
}
/*
* This table is the definition of how we handle ICMP .
*/
2005-11-30 03:21:38 +03:00
static const struct icmp_control icmp_pointers [ NR_ICMP_TYPES + 1 ] = {
2005-04-17 02:20:36 +04:00
[ ICMP_ECHOREPLY ] = {
. handler = icmp_discard ,
} ,
[ 1 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ 2 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ ICMP_DEST_UNREACH ] = {
. handler = icmp_unreach ,
. error = 1 ,
} ,
[ ICMP_SOURCE_QUENCH ] = {
. handler = icmp_unreach ,
. error = 1 ,
} ,
[ ICMP_REDIRECT ] = {
. handler = icmp_redirect ,
. error = 1 ,
} ,
[ 6 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ 7 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ ICMP_ECHO ] = {
. handler = icmp_echo ,
} ,
[ 9 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ 10 ] = {
. handler = icmp_discard ,
. error = 1 ,
} ,
[ ICMP_TIME_EXCEEDED ] = {
. handler = icmp_unreach ,
. error = 1 ,
} ,
[ ICMP_PARAMETERPROB ] = {
. handler = icmp_unreach ,
. error = 1 ,
} ,
[ ICMP_TIMESTAMP ] = {
. handler = icmp_timestamp ,
} ,
[ ICMP_TIMESTAMPREPLY ] = {
. handler = icmp_discard ,
} ,
[ ICMP_INFO_REQUEST ] = {
. handler = icmp_discard ,
} ,
2007-02-09 17:24:47 +03:00
[ ICMP_INFO_REPLY ] = {
2005-04-17 02:20:36 +04:00
. handler = icmp_discard ,
} ,
[ ICMP_ADDRESS ] = {
. handler = icmp_address ,
} ,
[ ICMP_ADDRESSREPLY ] = {
. handler = icmp_address_reply ,
} ,
} ;
2008-02-29 22:14:50 +03:00
static void __exit icmp_exit ( void )
2005-04-17 02:20:36 +04:00
{
int i ;
2006-04-11 09:52:50 +04:00
for_each_possible_cpu ( i ) {
2008-02-29 22:16:08 +03:00
struct sock * sk ;
2008-02-29 22:14:50 +03:00
2008-02-29 22:16:08 +03:00
sk = per_cpu ( __icmp_sk , i ) ;
if ( sk = = NULL )
2008-02-29 22:14:50 +03:00
continue ;
2008-02-29 22:16:08 +03:00
per_cpu ( __icmp_sk , i ) = NULL ;
sock_release ( sk - > sk_socket ) ;
2008-02-29 22:14:50 +03:00
}
}
2005-04-17 02:20:36 +04:00
2008-02-29 22:14:50 +03:00
int __init icmp_init ( void )
{
int i , err ;
for_each_possible_cpu ( i ) {
2008-02-29 22:15:42 +03:00
struct sock * sk ;
struct socket * sock ;
struct inet_sock * inet ;
2005-04-17 02:20:36 +04:00
2008-02-29 22:15:42 +03:00
err = sock_create_kern ( PF_INET , SOCK_RAW , IPPROTO_ICMP , & sock ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
2008-02-29 22:14:50 +03:00
goto fail ;
2005-04-17 02:20:36 +04:00
2008-02-29 22:16:08 +03:00
per_cpu ( __icmp_sk , i ) = sk = sock - > sk ;
2008-02-29 22:15:42 +03:00
sk - > sk_allocation = GFP_ATOMIC ;
2005-04-17 02:20:36 +04:00
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead .
*/
2008-02-29 22:15:42 +03:00
sk - > sk_sndbuf =
2005-04-17 02:20:36 +04:00
( 2 * ( ( 64 * 1024 ) + sizeof ( struct sk_buff ) ) ) ;
2008-02-29 22:15:42 +03:00
inet = inet_sk ( sk ) ;
2005-04-17 02:20:36 +04:00
inet - > uc_ttl = - 1 ;
inet - > pmtudisc = IP_PMTUDISC_DONT ;
/* Unhash it so that IP input processing does not even
* see it , we do not wish this socket to see incoming
* packets .
*/
2008-02-29 22:15:42 +03:00
sk - > sk_prot - > unhash ( sk ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-29 22:14:50 +03:00
return 0 ;
fail :
icmp_exit ( ) ;
return err ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( icmp_err_convert ) ;
EXPORT_SYMBOL ( icmp_send ) ;
EXPORT_SYMBOL ( icmp_statistics ) ;
EXPORT_SYMBOL ( xrlim_allow ) ;