2005-04-17 02:20:36 +04:00
/*
* INET An implementation of the TCP / IP protocol suite for the LINUX
* operating system . INET is implemented using the BSD Socket
* interface as the means of communication with the user level .
*
* Definitions for the IP module .
*
* Version : @ ( # ) ip . h 1.0 .2 05 / 07 / 93
*
2005-05-06 03:16:16 +04:00
* Authors : Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
* Alan Cox , < gw4pts @ gw4pts . ampr . org >
*
* Changes :
* Mike McLagan : Routing by source
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# ifndef _IP_H
# define _IP_H
# include <linux/types.h>
# include <linux/ip.h>
# include <linux/in.h>
2007-03-13 02:09:15 +03:00
# include <linux/skbuff.h>
2005-12-27 07:43:12 +03:00
# include <net/inet_sock.h>
2005-04-17 02:20:36 +04:00
# include <net/snmp.h>
2008-10-01 18:44:42 +04:00
# include <net/flow.h>
2005-04-17 02:20:36 +04:00
struct sock ;
2009-11-03 06:26:03 +03:00
struct inet_skb_parm {
2005-04-17 02:20:36 +04:00
struct ip_options opt ; /* Compiled IP options */
unsigned char flags ;
2006-01-07 10:04:01 +03:00
# define IPSKB_FORWARDED 1
# define IPSKB_XFRM_TUNNEL_SIZE 2
2006-01-07 10:04:54 +03:00
# define IPSKB_XFRM_TRANSFORMED 4
# define IPSKB_FRAG_COMPLETE 8
2006-02-16 02:10:22 +03:00
# define IPSKB_REROUTED 16
2005-04-17 02:20:36 +04:00
} ;
2007-03-13 02:09:15 +03:00
static inline unsigned int ip_hdrlen ( const struct sk_buff * skb )
{
2007-04-21 09:47:35 +04:00
return ip_hdr ( skb ) - > ihl * 4 ;
2007-03-13 02:09:15 +03:00
}
2009-11-03 06:26:03 +03:00
struct ipcm_cookie {
2006-09-28 05:28:28 +04:00
__be32 addr ;
2005-04-17 02:20:36 +04:00
int oif ;
2011-04-21 13:45:37 +04:00
struct ip_options_rcu * opt ;
2010-08-17 12:59:14 +04:00
__u8 tx_flags ;
2005-04-17 02:20:36 +04:00
} ;
# define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
2009-11-03 06:26:03 +03:00
struct ip_ra_chain {
2010-10-25 07:32:44 +04:00
struct ip_ra_chain __rcu * next ;
2005-04-17 02:20:36 +04:00
struct sock * sk ;
2010-06-09 20:21:07 +04:00
union {
void ( * destructor ) ( struct sock * ) ;
struct sock * saved_sk ;
} ;
2010-06-07 07:12:08 +04:00
struct rcu_head rcu ;
2005-04-17 02:20:36 +04:00
} ;
2010-10-25 07:32:44 +04:00
extern struct ip_ra_chain __rcu * ip_ra_chain ;
2005-04-17 02:20:36 +04:00
/* IP flags. */
# define IP_CE 0x8000 /* Flag: "Congestion" */
# define IP_DF 0x4000 /* Flag: "Don't Fragment" */
# define IP_MF 0x2000 /* Flag: "More Fragments" */
# define IP_OFFSET 0x1FFF /* "Fragment Offset" part */
# define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */
2005-12-27 07:43:12 +03:00
struct msghdr ;
struct net_device ;
struct packet_type ;
struct rtable ;
struct sockaddr ;
2005-04-17 02:20:36 +04:00
extern int igmp_mc_proc_init ( void ) ;
/*
* Functions provided by ip . c
*/
extern int ip_build_and_send_pkt ( struct sk_buff * skb , struct sock * sk ,
2006-09-27 09:27:30 +04:00
__be32 saddr , __be32 daddr ,
2011-04-21 13:45:37 +04:00
struct ip_options_rcu * opt ) ;
2005-04-17 02:20:36 +04:00
extern int ip_rcv ( struct sk_buff * skb , struct net_device * dev ,
2005-08-10 06:34:12 +04:00
struct packet_type * pt , struct net_device * orig_dev ) ;
2005-04-17 02:20:36 +04:00
extern int ip_local_deliver ( struct sk_buff * skb ) ;
extern int ip_mr_input ( struct sk_buff * skb ) ;
extern int ip_output ( struct sk_buff * skb ) ;
extern int ip_mc_output ( struct sk_buff * skb ) ;
2006-04-05 00:42:35 +04:00
extern int ip_fragment ( struct sk_buff * skb , int ( * output ) ( struct sk_buff * ) ) ;
2005-04-17 02:20:36 +04:00
extern int ip_do_nat ( struct sk_buff * skb ) ;
extern void ip_send_check ( struct iphdr * ip ) ;
2008-01-12 06:14:00 +03:00
extern int __ip_local_out ( struct sk_buff * skb ) ;
extern int ip_local_out ( struct sk_buff * skb ) ;
2011-05-07 09:23:20 +04:00
extern int ip_queue_xmit ( struct sk_buff * skb , struct flowi * fl ) ;
2005-04-17 02:20:36 +04:00
extern void ip_init ( void ) ;
2011-05-09 04:24:10 +04:00
extern int ip_append_data ( struct sock * sk , struct flowi4 * fl4 ,
2005-04-17 02:20:36 +04:00
int getfrag ( void * from , char * to , int offset , int len ,
int odd , struct sk_buff * skb ) ,
void * from , int len , int protolen ,
struct ipcm_cookie * ipc ,
2008-11-25 02:52:46 +03:00
struct rtable * * rt ,
2005-04-17 02:20:36 +04:00
unsigned int flags ) ;
extern int ip_generic_getfrag ( void * from , char * to , int offset , int len , int odd , struct sk_buff * skb ) ;
2011-05-09 04:24:10 +04:00
extern ssize_t ip_append_page ( struct sock * sk , struct flowi4 * fl4 , struct page * page ,
2005-04-17 02:20:36 +04:00
int offset , size_t size , int flags ) ;
2011-03-01 05:36:47 +03:00
extern struct sk_buff * __ip_make_skb ( struct sock * sk ,
2011-05-09 04:12:19 +04:00
struct flowi4 * fl4 ,
2011-03-01 05:36:47 +03:00
struct sk_buff_head * queue ,
struct inet_cork * cork ) ;
extern int ip_send_skb ( struct sk_buff * skb ) ;
2011-05-09 04:12:19 +04:00
extern int ip_push_pending_frames ( struct sock * sk , struct flowi4 * fl4 ) ;
2005-04-17 02:20:36 +04:00
extern void ip_flush_pending_frames ( struct sock * sk ) ;
2011-03-01 05:36:47 +03:00
extern struct sk_buff * ip_make_skb ( struct sock * sk ,
2011-05-09 04:12:19 +04:00
struct flowi4 * fl4 ,
2011-03-01 05:36:47 +03:00
int getfrag ( void * from , char * to , int offset , int len ,
int odd , struct sk_buff * skb ) ,
void * from , int length , int transhdrlen ,
struct ipcm_cookie * ipc ,
struct rtable * * rtp ,
unsigned int flags ) ;
2011-05-09 04:12:19 +04:00
static inline struct sk_buff * ip_finish_skb ( struct sock * sk , struct flowi4 * fl4 )
2011-03-01 05:36:47 +03:00
{
2011-05-09 04:12:19 +04:00
return __ip_make_skb ( sk , fl4 , & sk - > sk_write_queue , & inet_sk ( sk ) - > cork . base ) ;
2011-03-01 05:36:47 +03:00
}
2005-04-17 02:20:36 +04:00
/* datagram.c */
extern int ip4_datagram_connect ( struct sock * sk ,
struct sockaddr * uaddr , int addr_len ) ;
struct ip_reply_arg {
struct kvec iov [ 1 ] ;
2008-10-01 18:41:00 +04:00
int flags ;
2006-11-15 08:26:08 +03:00
__wsum csum ;
2005-04-17 02:20:36 +04:00
int csumoffset ; /* u16 offset of csum in iov[0].iov_base */
/* -1 if not needed */
2007-06-05 08:32:46 +04:00
int bound_dev_if ;
2011-10-24 11:06:21 +04:00
u8 tos ;
2005-04-17 02:20:36 +04:00
} ;
2008-10-01 18:41:00 +04:00
# define IP_REPLY_ARG_NOSRCCHECK 1
2008-10-01 18:44:42 +04:00
static inline __u8 ip_reply_arg_flowi_flags ( const struct ip_reply_arg * arg )
{
return ( arg - > flags & IP_REPLY_ARG_NOSRCCHECK ) ? FLOWI_FLAG_ANYSRC : 0 ;
}
2011-05-10 00:22:43 +04:00
void ip_send_reply ( struct sock * sk , struct sk_buff * skb , __be32 daddr ,
2011-10-24 11:06:21 +04:00
const struct ip_reply_arg * arg , unsigned int len ) ;
2005-04-17 02:20:36 +04:00
2009-11-03 06:26:03 +03:00
struct ipv4_config {
2005-04-17 02:20:36 +04:00
int log_martians ;
int no_pmtu_disc ;
} ;
extern struct ipv4_config ipv4_config ;
2010-07-01 00:31:19 +04:00
# define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field)
# define IP_INC_STATS_BH(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
# define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
# define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
# define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
# define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
2008-07-18 15:03:08 +04:00
# define NET_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.net_statistics, field)
# define NET_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
# define NET_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
# define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
# define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
2005-04-17 02:20:36 +04:00
2010-02-16 18:20:26 +03:00
extern unsigned long snmp_fold_field ( void __percpu * mib [ ] , int offt ) ;
2010-07-01 00:31:19 +04:00
# if BITS_PER_LONG==32
extern u64 snmp_fold_field64 ( void __percpu * mib [ ] , int offt , size_t sync_off ) ;
# else
static inline u64 snmp_fold_field64 ( void __percpu * mib [ ] , int offt , size_t syncp_off )
{
return snmp_fold_field ( mib , offt ) ;
}
# endif
2010-06-23 00:58:41 +04:00
extern int snmp_mib_init ( void __percpu * ptr [ 2 ] , size_t mibsize , size_t align ) ;
2010-02-16 18:20:26 +03:00
extern void snmp_mib_free ( void __percpu * ptr [ 2 ] ) ;
2007-04-21 02:57:15 +04:00
2008-10-09 01:18:04 +04:00
extern struct local_ports {
seqlock_t lock ;
int range [ 2 ] ;
} sysctl_local_ports ;
2007-10-11 04:30:46 +04:00
extern void inet_get_local_port_range ( int * low , int * high ) ;
2010-05-05 04:27:06 +04:00
extern unsigned long * sysctl_local_reserved_ports ;
static inline int inet_is_reserved_local_port ( int port )
{
return test_bit ( port , sysctl_local_reserved_ports ) ;
}
2005-06-14 02:12:33 +04:00
extern int sysctl_ip_nonlocal_bind ;
2005-04-17 02:20:36 +04:00
2005-08-16 09:18:02 +04:00
/* From inetpeer.c */
extern int inet_peer_threshold ;
extern int inet_peer_minttl ;
extern int inet_peer_maxttl ;
/* From ip_output.c */
extern int sysctl_ip_dynaddr ;
extern void ipfrag_init ( void ) ;
2008-07-16 00:00:59 +04:00
extern void ip_static_sysctl_init ( void ) ;
2011-06-24 08:28:52 +04:00
static inline bool ip_is_fragment ( const struct iphdr * iph )
{
return ( iph - > frag_off & htons ( IP_MF | IP_OFFSET ) ) ! = 0 ;
}
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_INET
2005-12-27 07:43:12 +03:00
# include <net/dst.h>
2005-04-17 02:20:36 +04:00
/* The function in 2.2 was invalid, producing wrong result for
* check = 0xFEFF . It was noticed by Arthur Skawina _year_ ago . - - ANK ( 000625 ) */
static inline
int ip_decrease_ttl ( struct iphdr * iph )
{
2006-11-15 08:42:26 +03:00
u32 check = ( __force u32 ) iph - > check ;
check + = ( __force u32 ) htons ( 0x0100 ) ;
iph - > check = ( __force __sum16 ) ( check + ( check > = 0xFFFF ) ) ;
2005-04-17 02:20:36 +04:00
return - - iph - > ttl ;
}
static inline
int ip_dont_fragment ( struct sock * sk , struct dst_entry * dst )
{
2010-09-23 00:43:57 +04:00
return inet_sk ( sk ) - > pmtudisc = = IP_PMTUDISC_DO | |
2005-04-17 02:20:36 +04:00
( inet_sk ( sk ) - > pmtudisc = = IP_PMTUDISC_WANT & &
2010-09-23 00:43:57 +04:00
! ( dst_metric_locked ( dst , RTAX_MTU ) ) ) ;
2005-04-17 02:20:36 +04:00
}
extern void __ip_select_ident ( struct iphdr * iph , struct dst_entry * dst , int more ) ;
static inline void ip_select_ident ( struct iphdr * iph , struct dst_entry * dst , struct sock * sk )
{
if ( iph - > frag_off & htons ( IP_DF ) ) {
/* This is only to work around buggy Windows95/2000
* VJ compression implementations . If the ID field
* does not change , they drop every other packet in
* a TCP stream using header compression .
*/
2009-10-15 10:30:45 +04:00
iph - > id = ( sk & & inet_sk ( sk ) - > inet_daddr ) ?
htons ( inet_sk ( sk ) - > inet_id + + ) : 0 ;
2005-04-17 02:20:36 +04:00
} else
__ip_select_ident ( iph , dst , 0 ) ;
}
static inline void ip_select_ident_more ( struct iphdr * iph , struct dst_entry * dst , struct sock * sk , int more )
{
if ( iph - > frag_off & htons ( IP_DF ) ) {
2009-10-15 10:30:45 +04:00
if ( sk & & inet_sk ( sk ) - > inet_daddr ) {
iph - > id = htons ( inet_sk ( sk ) - > inet_id ) ;
inet_sk ( sk ) - > inet_id + = 1 + more ;
2005-04-17 02:20:36 +04:00
} else
iph - > id = 0 ;
} else
__ip_select_ident ( iph , dst , more ) ;
}
/*
* Map a multicast IP onto multicast MAC for type ethernet .
*/
2006-11-15 07:51:49 +03:00
static inline void ip_eth_mc_map ( __be32 naddr , char * buf )
2005-04-17 02:20:36 +04:00
{
2006-11-15 07:51:49 +03:00
__u32 addr = ntohl ( naddr ) ;
2005-04-17 02:20:36 +04:00
buf [ 0 ] = 0x01 ;
buf [ 1 ] = 0x00 ;
buf [ 2 ] = 0x5e ;
buf [ 5 ] = addr & 0xFF ;
addr > > = 8 ;
buf [ 4 ] = addr & 0xFF ;
addr > > = 8 ;
buf [ 3 ] = addr & 0x7F ;
}
/*
* Map a multicast IP onto multicast MAC for type IP - over - InfiniBand .
* Leave P_Key as 0 to be filled in by driver .
*/
2007-12-10 23:38:41 +03:00
static inline void ip_ib_mc_map ( __be32 naddr , const unsigned char * broadcast , char * buf )
2005-04-17 02:20:36 +04:00
{
2006-11-15 07:51:49 +03:00
__u32 addr ;
2007-12-10 23:38:41 +03:00
unsigned char scope = broadcast [ 5 ] & 0xF ;
2005-04-17 02:20:36 +04:00
buf [ 0 ] = 0 ; /* Reserved */
buf [ 1 ] = 0xff ; /* Multicast QPN */
buf [ 2 ] = 0xff ;
buf [ 3 ] = 0xff ;
2006-11-15 07:51:49 +03:00
addr = ntohl ( naddr ) ;
2005-04-17 02:20:36 +04:00
buf [ 4 ] = 0xff ;
2007-12-10 23:38:41 +03:00
buf [ 5 ] = 0x10 | scope ; /* scope from broadcast address */
2005-04-17 02:20:36 +04:00
buf [ 6 ] = 0x40 ; /* IPv4 signature */
buf [ 7 ] = 0x1b ;
2007-12-10 23:38:41 +03:00
buf [ 8 ] = broadcast [ 8 ] ; /* P_Key */
buf [ 9 ] = broadcast [ 9 ] ;
2005-04-17 02:20:36 +04:00
buf [ 10 ] = 0 ;
buf [ 11 ] = 0 ;
buf [ 12 ] = 0 ;
buf [ 13 ] = 0 ;
buf [ 14 ] = 0 ;
buf [ 15 ] = 0 ;
buf [ 19 ] = addr & 0xff ;
addr > > = 8 ;
buf [ 18 ] = addr & 0xff ;
addr > > = 8 ;
buf [ 17 ] = addr & 0xff ;
addr > > = 8 ;
buf [ 16 ] = addr & 0x0f ;
}
2011-03-29 02:40:53 +04:00
static inline void ip_ipgre_mc_map ( __be32 naddr , const unsigned char * broadcast , char * buf )
{
if ( ( broadcast [ 0 ] | broadcast [ 1 ] | broadcast [ 2 ] | broadcast [ 3 ] ) ! = 0 )
memcpy ( buf , broadcast , 4 ) ;
else
memcpy ( buf , & naddr , sizeof ( naddr ) ) ;
}
2011-12-10 13:48:31 +04:00
# if IS_ENABLED(CONFIG_IPV6)
2005-04-17 02:20:36 +04:00
# include <linux/ipv6.h>
# endif
static __inline__ void inet_reset_saddr ( struct sock * sk )
{
2009-10-15 10:30:45 +04:00
inet_sk ( sk ) - > inet_rcv_saddr = inet_sk ( sk ) - > inet_saddr = 0 ;
2011-12-10 13:48:31 +04:00
# if IS_ENABLED(CONFIG_IPV6)
2005-04-17 02:20:36 +04:00
if ( sk - > sk_family = = PF_INET6 ) {
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
memset ( & np - > saddr , 0 , sizeof ( np - > saddr ) ) ;
memset ( & np - > rcv_saddr , 0 , sizeof ( np - > rcv_saddr ) ) ;
}
# endif
}
# endif
2010-01-07 07:37:01 +03:00
static inline int sk_mc_loop ( struct sock * sk )
{
if ( ! sk )
return 1 ;
switch ( sk - > sk_family ) {
case AF_INET :
return inet_sk ( sk ) - > mc_loop ;
2011-12-10 13:48:31 +04:00
# if IS_ENABLED(CONFIG_IPV6)
2010-01-07 07:37:01 +03:00
case AF_INET6 :
return inet6_sk ( sk ) - > mc_loop ;
# endif
}
2010-01-14 05:10:36 +03:00
WARN_ON ( 1 ) ;
2010-01-07 07:37:01 +03:00
return 1 ;
}
2012-03-08 05:45:32 +04:00
extern bool ip_call_ra_chain ( struct sk_buff * skb ) ;
2005-04-17 02:20:36 +04:00
/*
2008-01-02 08:13:09 +03:00
* Functions provided by ip_fragment . c
2005-04-17 02:20:36 +04:00
*/
2009-11-03 06:26:03 +03:00
enum ip_defrag_users {
2005-04-17 02:20:36 +04:00
IP_DEFRAG_LOCAL_DELIVER ,
IP_DEFRAG_CALL_RA_CHAIN ,
IP_DEFRAG_CONNTRACK_IN ,
2010-05-25 01:33:03 +04:00
__IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX ,
2005-04-17 02:20:36 +04:00
IP_DEFRAG_CONNTRACK_OUT ,
2010-05-25 01:33:03 +04:00
__IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX ,
2009-12-15 18:59:59 +03:00
IP_DEFRAG_CONNTRACK_BRIDGE_IN ,
2010-05-25 01:33:03 +04:00
__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX ,
2005-04-17 02:20:36 +04:00
IP_DEFRAG_VS_IN ,
IP_DEFRAG_VS_OUT ,
2011-07-05 12:05:48 +04:00
IP_DEFRAG_VS_FWD ,
IP_DEFRAG_AF_PACKET ,
2011-10-06 14:28:31 +04:00
IP_DEFRAG_MACVLAN ,
2005-04-17 02:20:36 +04:00
} ;
2007-10-14 11:38:32 +04:00
int ip_defrag ( struct sk_buff * skb , u32 user ) ;
2011-10-06 14:28:31 +04:00
# ifdef CONFIG_INET
struct sk_buff * ip_check_defrag ( struct sk_buff * skb , u32 user ) ;
# else
static inline struct sk_buff * ip_check_defrag ( struct sk_buff * skb , u32 user )
{
return skb ;
}
# endif
2008-01-22 17:07:25 +03:00
int ip_frag_mem ( struct net * net ) ;
2008-01-22 17:06:23 +03:00
int ip_frag_nqueues ( struct net * net ) ;
2005-04-17 02:20:36 +04:00
/*
* Functions provided by ip_forward . c
*/
extern int ip_forward ( struct sk_buff * skb ) ;
/*
* Functions provided by ip_options . c
*/
2011-04-21 13:45:37 +04:00
extern void ip_options_build ( struct sk_buff * skb , struct ip_options * opt ,
__be32 daddr , struct rtable * rt , int is_frag ) ;
2005-04-17 02:20:36 +04:00
extern int ip_options_echo ( struct ip_options * dopt , struct sk_buff * skb ) ;
extern void ip_options_fragment ( struct sk_buff * skb ) ;
2008-03-25 01:29:23 +03:00
extern int ip_options_compile ( struct net * net ,
struct ip_options * opt , struct sk_buff * skb ) ;
2011-04-21 13:45:37 +04:00
extern int ip_options_get ( struct net * net , struct ip_options_rcu * * optp ,
2005-08-17 02:46:48 +04:00
unsigned char * data , int optlen ) ;
2011-04-21 13:45:37 +04:00
extern int ip_options_get_from_user ( struct net * net , struct ip_options_rcu * * optp ,
2005-08-17 02:46:48 +04:00
unsigned char __user * data , int optlen ) ;
2005-04-17 02:20:36 +04:00
extern void ip_options_undo ( struct ip_options * opt ) ;
extern void ip_forward_options ( struct sk_buff * skb ) ;
extern int ip_options_rcv_srr ( struct sk_buff * skb ) ;
/*
* Functions provided by ip_sockglue . c
*/
ipv4: PKTINFO doesnt need dst reference
Le lundi 07 novembre 2011 à 15:33 +0100, Eric Dumazet a écrit :
> At least, in recent kernels we dont change dst->refcnt in forwarding
> patch (usinf NOREF skb->dst)
>
> One particular point is the atomic_inc(dst->refcnt) we have to perform
> when queuing an UDP packet if socket asked PKTINFO stuff (for example a
> typical DNS server has to setup this option)
>
> I have one patch somewhere that stores the information in skb->cb[] and
> avoid the atomic_{inc|dec}(dst->refcnt).
>
OK I found it, I did some extra tests and believe its ready.
[PATCH net-next] ipv4: IP_PKTINFO doesnt need dst reference
When a socket uses IP_PKTINFO notifications, we currently force a dst
reference for each received skb. Reader has to access dst to get needed
information (rt_iif & rt_spec_dst) and must release dst reference.
We also forced a dst reference if skb was put in socket backlog, even
without IP_PKTINFO handling. This happens under stress/load.
We can instead store the needed information in skb->cb[], so that only
softirq handler really access dst, improving cache hit ratios.
This removes two atomic operations per packet, and false sharing as
well.
On a benchmark using a mono threaded receiver (doing only recvmsg()
calls), I can reach 720.000 pps instead of 570.000 pps.
IP_PKTINFO is typically used by DNS servers, and any multihomed aware
UDP application.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-09 11:24:35 +04:00
extern void ipv4_pktinfo_prepare ( struct sk_buff * skb ) ;
2005-04-17 02:20:36 +04:00
extern void ip_cmsg_recv ( struct msghdr * msg , struct sk_buff * skb ) ;
2008-03-25 01:30:27 +03:00
extern int ip_cmsg_send ( struct net * net ,
struct msghdr * msg , struct ipcm_cookie * ipc ) ;
2009-10-01 03:12:20 +04:00
extern int ip_setsockopt ( struct sock * sk , int level , int optname , char __user * optval , unsigned int optlen ) ;
2005-04-17 02:20:36 +04:00
extern int ip_getsockopt ( struct sock * sk , int level , int optname , char __user * optval , int __user * optlen ) ;
2006-03-21 09:45:21 +03:00
extern int compat_ip_setsockopt ( struct sock * sk , int level ,
2009-10-01 03:12:20 +04:00
int optname , char __user * optval , unsigned int optlen ) ;
2006-03-21 09:45:21 +03:00
extern int compat_ip_getsockopt ( struct sock * sk , int level ,
int optname , char __user * optval , int __user * optlen ) ;
2005-04-17 02:20:36 +04:00
extern int ip_ra_control ( struct sock * sk , unsigned char on , void ( * destructor ) ( struct sock * ) ) ;
extern int ip_recv_error ( struct sock * sk , struct msghdr * msg , int len ) ;
extern void ip_icmp_error ( struct sock * sk , struct sk_buff * skb , int err ,
2006-09-28 05:34:21 +04:00
__be16 port , u32 info , u8 * payload ) ;
2006-09-28 05:33:40 +04:00
extern void ip_local_error ( struct sock * sk , int err , __be32 daddr , __be16 dport ,
2005-04-17 02:20:36 +04:00
u32 info ) ;
2005-08-16 09:18:02 +04:00
# ifdef CONFIG_PROC_FS
extern int ip_misc_proc_init ( void ) ;
# endif
2005-04-17 02:20:36 +04:00
# endif /* _IP_H */