2011-11-28 09:22:18 +04:00
# include <linux/skbuff.h>
2012-01-25 01:03:33 +04:00
# include <linux/export.h>
2011-11-28 09:22:18 +04:00
# include <linux/ip.h>
# include <linux/ipv6.h>
# include <linux/if_vlan.h>
# include <net/ip.h>
2012-07-18 12:11:12 +04:00
# include <net/ipv6.h>
2013-03-19 10:39:30 +04:00
# include <linux/igmp.h>
# include <linux/icmp.h>
# include <linux/sctp.h>
# include <linux/dccp.h>
2011-11-28 09:22:18 +04:00
# include <linux/if_tunnel.h>
# include <linux/if_pppox.h>
# include <linux/ppp_defs.h>
2015-05-12 15:56:07 +03:00
# include <net/flow_dissector.h>
2014-09-06 03:20:26 +04:00
# include <scsi/fc/fc_fcoe.h>
2011-11-28 09:22:18 +04:00
flow_dissector: use a 64bit load/store
Le lundi 28 novembre 2011 à 19:06 -0500, David Miller a écrit :
> From: Dimitris Michailidis <dm@chelsio.com>
> Date: Mon, 28 Nov 2011 08:25:39 -0800
>
> >> +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
> >> *flow)
> >> +{
> >> + int poff, nhoff = skb_network_offset(skb);
> >> + u8 ip_proto;
> >> + u16 proto = skb->protocol;
> >
> > __be16 instead of u16 for proto?
>
> I'll take care of this when I apply these patches.
( CC trimmed )
Thanks David !
Here is a small patch to use one 64bit load/store on x86_64 instead of
two 32bit load/stores.
[PATCH net-next] flow_dissector: use a 64bit load/store
gcc compiler is smart enough to use a single load/store if we
memcpy(dptr, sptr, 8) on x86_64, regardless of
CONFIG_CC_OPTIMIZE_FOR_SIZE
In IP header, daddr immediately follows saddr, this wont change in the
future. We only need to make sure our flow_keys (src,dst) fields wont
break the rule.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-29 00:30:35 +04:00
/* copy saddr & daddr, possibly using 64bit load/store
* Equivalent to : flow - > src = iph - > saddr ;
* flow - > dst = iph - > daddr ;
*/
static void iph_to_flow_copy_addrs ( struct flow_keys * flow , const struct iphdr * iph )
{
BUILD_BUG_ON ( offsetof ( typeof ( * flow ) , dst ) ! =
offsetof ( typeof ( * flow ) , src ) + sizeof ( flow - > src ) ) ;
memcpy ( & flow - > src , & iph - > saddr , sizeof ( flow - > src ) + sizeof ( flow - > dst ) ) ;
}
2011-11-28 09:22:18 +04:00
2013-10-02 15:39:24 +04:00
/**
2014-08-26 04:03:46 +04:00
* __skb_flow_get_ports - extract the upper layer ports and return them
* @ skb : sk_buff to extract the ports from
2013-10-02 15:39:24 +04:00
* @ thoff : transport header offset
* @ ip_proto : protocol for which to get port offset
2014-08-26 04:03:46 +04:00
* @ data : raw buffer pointer to the packet , if NULL use skb - > data
* @ hlen : packet header length , if @ data is NULL use skb_headlen ( skb )
2013-10-02 15:39:24 +04:00
*
* The function will try to retrieve the ports at offset thoff + poff where poff
* is the protocol port offset returned from proto_ports_offset
*/
2014-08-23 23:13:41 +04:00
__be32 __skb_flow_get_ports ( const struct sk_buff * skb , int thoff , u8 ip_proto ,
void * data , int hlen )
2013-10-02 15:39:24 +04:00
{
int poff = proto_ports_offset ( ip_proto ) ;
2014-08-23 23:13:41 +04:00
if ( ! data ) {
data = skb - > data ;
hlen = skb_headlen ( skb ) ;
}
2013-10-02 15:39:24 +04:00
if ( poff > = 0 ) {
__be32 * ports , _ports ;
2014-08-23 23:13:41 +04:00
ports = __skb_header_pointer ( skb , thoff + poff ,
sizeof ( _ports ) , data , hlen , & _ports ) ;
2013-10-02 15:39:24 +04:00
if ( ports )
return * ports ;
}
return 0 ;
}
2014-08-23 23:13:41 +04:00
EXPORT_SYMBOL ( __skb_flow_get_ports ) ;
2013-10-02 15:39:24 +04:00
2014-08-26 04:03:47 +04:00
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @ skb : sk_buff to extract the flow from , can be NULL if the rest are specified
* @ data : raw buffer pointer to the packet , if NULL use skb - > data
* @ proto : protocol for which to get the flow , if @ data is NULL use skb - > protocol
* @ nhoff : network header offset , if @ data is NULL use skb_network_offset ( skb )
* @ hlen : packet header length , if @ data is NULL use skb_headlen ( skb )
*
* The function will try to retrieve the struct flow_keys from either the skbuff
* or a raw buffer specified by the rest parameters
*/
bool __skb_flow_dissect ( const struct sk_buff * skb , struct flow_keys * flow ,
void * data , __be16 proto , int nhoff , int hlen )
2011-11-28 09:22:18 +04:00
{
u8 ip_proto ;
2014-08-23 23:13:41 +04:00
if ( ! data ) {
data = skb - > data ;
2014-08-26 04:03:47 +04:00
proto = skb - > protocol ;
nhoff = skb_network_offset ( skb ) ;
2014-08-23 23:13:41 +04:00
hlen = skb_headlen ( skb ) ;
}
2011-11-28 09:22:18 +04:00
memset ( flow , 0 , sizeof ( * flow ) ) ;
again :
switch ( proto ) {
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_IP ) : {
2011-11-28 09:22:18 +04:00
const struct iphdr * iph ;
struct iphdr _iph ;
ip :
2014-08-23 23:13:41 +04:00
iph = __skb_header_pointer ( skb , nhoff , sizeof ( _iph ) , data , hlen , & _iph ) ;
2013-11-01 11:01:10 +04:00
if ( ! iph | | iph - > ihl < 5 )
2011-11-28 09:22:18 +04:00
return false ;
2013-11-07 20:37:28 +04:00
nhoff + = iph - > ihl * 4 ;
2011-11-28 09:22:18 +04:00
2013-11-07 20:37:28 +04:00
ip_proto = iph - > protocol ;
2011-11-28 09:22:18 +04:00
if ( ip_is_fragment ( iph ) )
ip_proto = 0 ;
2013-11-07 20:37:28 +04:00
2014-10-10 23:09:12 +04:00
/* skip the address processing if skb is NULL. The assumption
* here is that if there is no skb we are not looking for flow
* info but lengths and protocols .
*/
if ( ! skb )
break ;
flow_dissector: use a 64bit load/store
Le lundi 28 novembre 2011 à 19:06 -0500, David Miller a écrit :
> From: Dimitris Michailidis <dm@chelsio.com>
> Date: Mon, 28 Nov 2011 08:25:39 -0800
>
> >> +bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
> >> *flow)
> >> +{
> >> + int poff, nhoff = skb_network_offset(skb);
> >> + u8 ip_proto;
> >> + u16 proto = skb->protocol;
> >
> > __be16 instead of u16 for proto?
>
> I'll take care of this when I apply these patches.
( CC trimmed )
Thanks David !
Here is a small patch to use one 64bit load/store on x86_64 instead of
two 32bit load/stores.
[PATCH net-next] flow_dissector: use a 64bit load/store
gcc compiler is smart enough to use a single load/store if we
memcpy(dptr, sptr, 8) on x86_64, regardless of
CONFIG_CC_OPTIMIZE_FOR_SIZE
In IP header, daddr immediately follows saddr, this wont change in the
future. We only need to make sure our flow_keys (src,dst) fields wont
break the rule.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-29 00:30:35 +04:00
iph_to_flow_copy_addrs ( flow , iph ) ;
2011-11-28 09:22:18 +04:00
break ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_IPV6 ) : {
2011-11-28 09:22:18 +04:00
const struct ipv6hdr * iph ;
struct ipv6hdr _iph ;
2014-07-02 08:33:01 +04:00
__be32 flow_label ;
2011-11-28 09:22:18 +04:00
ipv6 :
2014-08-23 23:13:41 +04:00
iph = __skb_header_pointer ( skb , nhoff , sizeof ( _iph ) , data , hlen , & _iph ) ;
2011-11-28 09:22:18 +04:00
if ( ! iph )
return false ;
ip_proto = iph - > nexthdr ;
nhoff + = sizeof ( struct ipv6hdr ) ;
2014-07-02 08:33:01 +04:00
2014-10-10 23:09:12 +04:00
/* see comment above in IPv4 section */
2014-09-06 03:20:26 +04:00
if ( ! skb )
break ;
2014-10-10 23:09:12 +04:00
flow - > src = ( __force __be32 ) ipv6_addr_hash ( & iph - > saddr ) ;
flow - > dst = ( __force __be32 ) ipv6_addr_hash ( & iph - > daddr ) ;
2014-07-02 08:33:01 +04:00
flow_label = ip6_flowlabel ( iph ) ;
if ( flow_label ) {
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection .
*/
flow - > n_proto = proto ;
flow - > ip_proto = ip_proto ;
flow - > ports = flow_label ;
flow - > thoff = ( u16 ) nhoff ;
return true ;
}
2011-11-28 09:22:18 +04:00
break ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_8021AD ) :
case htons ( ETH_P_8021Q ) : {
2011-11-28 09:22:18 +04:00
const struct vlan_hdr * vlan ;
struct vlan_hdr _vlan ;
2014-08-23 23:13:41 +04:00
vlan = __skb_header_pointer ( skb , nhoff , sizeof ( _vlan ) , data , hlen , & _vlan ) ;
2011-11-28 09:22:18 +04:00
if ( ! vlan )
return false ;
proto = vlan - > h_vlan_encapsulated_proto ;
nhoff + = sizeof ( * vlan ) ;
goto again ;
}
2014-03-12 21:04:17 +04:00
case htons ( ETH_P_PPP_SES ) : {
2011-11-28 09:22:18 +04:00
struct {
struct pppoe_hdr hdr ;
__be16 proto ;
} * hdr , _hdr ;
2014-08-23 23:13:41 +04:00
hdr = __skb_header_pointer ( skb , nhoff , sizeof ( _hdr ) , data , hlen , & _hdr ) ;
2011-11-28 09:22:18 +04:00
if ( ! hdr )
return false ;
proto = hdr - > proto ;
nhoff + = PPPOE_SES_HLEN ;
switch ( proto ) {
2014-03-12 21:04:17 +04:00
case htons ( PPP_IP ) :
2011-11-28 09:22:18 +04:00
goto ip ;
2014-03-12 21:04:17 +04:00
case htons ( PPP_IPV6 ) :
2011-11-28 09:22:18 +04:00
goto ipv6 ;
default :
return false ;
}
}
2015-01-22 19:10:32 +03:00
case htons ( ETH_P_TIPC ) : {
struct {
__be32 pre [ 3 ] ;
__be32 srcnode ;
} * hdr , _hdr ;
hdr = __skb_header_pointer ( skb , nhoff , sizeof ( _hdr ) , data , hlen , & _hdr ) ;
if ( ! hdr )
return false ;
flow - > src = hdr - > srcnode ;
flow - > dst = 0 ;
flow - > n_proto = proto ;
flow - > thoff = ( u16 ) nhoff ;
return true ;
}
2014-09-06 03:20:26 +04:00
case htons ( ETH_P_FCOE ) :
flow - > thoff = ( u16 ) ( nhoff + FCOE_HEADER_LEN ) ;
/* fall through */
2011-11-28 09:22:18 +04:00
default :
return false ;
}
switch ( ip_proto ) {
case IPPROTO_GRE : {
struct gre_hdr {
__be16 flags ;
__be16 proto ;
} * hdr , _hdr ;
2014-08-23 23:13:41 +04:00
hdr = __skb_header_pointer ( skb , nhoff , sizeof ( _hdr ) , data , hlen , & _hdr ) ;
2011-11-28 09:22:18 +04:00
if ( ! hdr )
return false ;
/*
* Only look inside GRE if version zero and no
* routing
*/
if ( ! ( hdr - > flags & ( GRE_VERSION | GRE_ROUTING ) ) ) {
proto = hdr - > proto ;
nhoff + = 4 ;
if ( hdr - > flags & GRE_CSUM )
nhoff + = 4 ;
if ( hdr - > flags & GRE_KEY )
nhoff + = 4 ;
if ( hdr - > flags & GRE_SEQ )
nhoff + = 4 ;
2013-03-11 10:52:28 +04:00
if ( proto = = htons ( ETH_P_TEB ) ) {
const struct ethhdr * eth ;
struct ethhdr _eth ;
2014-08-23 23:13:41 +04:00
eth = __skb_header_pointer ( skb , nhoff ,
sizeof ( _eth ) ,
data , hlen , & _eth ) ;
2013-03-11 10:52:28 +04:00
if ( ! eth )
return false ;
proto = eth - > h_proto ;
nhoff + = sizeof ( * eth ) ;
}
2011-11-28 09:22:18 +04:00
goto again ;
}
break ;
}
case IPPROTO_IPIP :
2013-07-29 22:07:36 +04:00
proto = htons ( ETH_P_IP ) ;
goto ip ;
2013-07-29 22:07:42 +04:00
case IPPROTO_IPV6 :
proto = htons ( ETH_P_IPV6 ) ;
goto ipv6 ;
2011-11-28 09:22:18 +04:00
default :
break ;
}
2014-06-23 14:37:58 +04:00
flow - > n_proto = proto ;
2011-11-28 09:22:18 +04:00
flow - > ip_proto = ip_proto ;
2013-03-19 10:39:29 +04:00
flow - > thoff = ( u16 ) nhoff ;
2014-10-10 23:09:12 +04:00
/* unless skb is set we don't need to record port info */
if ( skb )
flow - > ports = __skb_flow_get_ports ( skb , nhoff , ip_proto ,
data , hlen ) ;
2011-11-28 09:22:18 +04:00
return true ;
}
2014-08-23 23:13:41 +04:00
EXPORT_SYMBOL ( __skb_flow_dissect ) ;
2013-01-21 04:39:24 +04:00
static u32 hashrnd __read_mostly ;
2013-10-23 22:06:00 +04:00
static __always_inline void __flow_hash_secret_init ( void )
{
net_get_random_once ( & hashrnd , sizeof ( hashrnd ) ) ;
}
2015-05-01 21:30:12 +03:00
static __always_inline u32 __flow_hash_3words ( u32 a , u32 b , u32 c , u32 keyval )
2013-10-23 22:06:00 +04:00
{
2015-05-01 21:30:12 +03:00
return jhash_3words ( a , b , c , keyval ) ;
2013-10-23 22:06:00 +04:00
}
2015-05-01 21:30:12 +03:00
static inline u32 __flow_hash_from_keys ( struct flow_keys * keys , u32 keyval )
2014-07-02 08:32:05 +04:00
{
u32 hash ;
/* get a consistent hash (same value on both flow directions) */
if ( ( ( __force u32 ) keys - > dst < ( __force u32 ) keys - > src ) | |
( ( ( __force u32 ) keys - > dst = = ( __force u32 ) keys - > src ) & &
( ( __force u16 ) keys - > port16 [ 1 ] < ( __force u16 ) keys - > port16 [ 0 ] ) ) ) {
swap ( keys - > dst , keys - > src ) ;
swap ( keys - > port16 [ 0 ] , keys - > port16 [ 1 ] ) ;
}
hash = __flow_hash_3words ( ( __force u32 ) keys - > dst ,
( __force u32 ) keys - > src ,
2015-05-01 21:30:12 +03:00
( __force u32 ) keys - > ports ,
keyval ) ;
2014-07-02 08:32:05 +04:00
if ( ! hash )
hash = 1 ;
return hash ;
}
u32 flow_hash_from_keys ( struct flow_keys * keys )
{
2015-05-01 21:30:12 +03:00
__flow_hash_secret_init ( ) ;
return __flow_hash_from_keys ( keys , hashrnd ) ;
2014-07-02 08:32:05 +04:00
}
EXPORT_SYMBOL ( flow_hash_from_keys ) ;
2015-05-01 21:30:12 +03:00
static inline u32 ___skb_get_hash ( const struct sk_buff * skb ,
struct flow_keys * keys , u32 keyval )
{
if ( ! skb_flow_dissect ( skb , keys ) )
return 0 ;
return __flow_hash_from_keys ( keys , keyval ) ;
}
2015-05-01 21:30:17 +03:00
struct _flow_keys_digest_data {
__be16 n_proto ;
u8 ip_proto ;
u8 padding ;
__be32 ports ;
__be32 src ;
__be32 dst ;
} ;
void make_flow_keys_digest ( struct flow_keys_digest * digest ,
const struct flow_keys * flow )
{
struct _flow_keys_digest_data * data =
( struct _flow_keys_digest_data * ) digest ;
BUILD_BUG_ON ( sizeof ( * data ) > sizeof ( * digest ) ) ;
memset ( digest , 0 , sizeof ( * digest ) ) ;
data - > n_proto = flow - > n_proto ;
data - > ip_proto = flow - > ip_proto ;
data - > ports = flow - > ports ;
data - > src = flow - > src ;
data - > dst = flow - > dst ;
}
EXPORT_SYMBOL ( make_flow_keys_digest ) ;
2015-05-12 15:56:10 +03:00
/**
* __skb_get_hash : calculate a flow hash
* @ skb : sk_buff to calculate flow hash from
*
* This function calculates a flow hash based on src / dst addresses
2014-03-25 02:34:47 +04:00
* and src / dst port numbers . Sets hash in skb to non - zero hash value
* on success , zero indicates no valid hash . Also , sets l4_hash in skb
2013-01-21 04:39:24 +04:00
* if hash is a canonical 4 - tuple hash over transport ports .
*/
2013-12-16 10:12:06 +04:00
void __skb_get_hash ( struct sk_buff * skb )
2013-01-21 04:39:24 +04:00
{
struct flow_keys keys ;
2015-05-01 21:30:12 +03:00
u32 hash ;
2013-01-21 04:39:24 +04:00
2015-05-01 21:30:12 +03:00
__flow_hash_secret_init ( ) ;
hash = ___skb_get_hash ( skb , & keys , hashrnd ) ;
if ( ! hash )
2013-01-21 04:39:24 +04:00
return ;
if ( keys . ports )
2014-03-25 02:34:47 +04:00
skb - > l4_hash = 1 ;
2014-07-02 08:33:17 +04:00
skb - > sw_hash = 1 ;
2015-05-01 21:30:12 +03:00
skb - > hash = hash ;
2013-01-21 04:39:24 +04:00
}
2013-12-16 10:12:06 +04:00
EXPORT_SYMBOL ( __skb_get_hash ) ;
2013-01-21 04:39:24 +04:00
2015-05-01 21:30:12 +03:00
__u32 skb_get_hash_perturb ( const struct sk_buff * skb , u32 perturb )
{
struct flow_keys keys ;
return ___skb_get_hash ( skb , & keys , perturb ) ;
}
EXPORT_SYMBOL ( skb_get_hash_perturb ) ;
2014-09-06 03:20:26 +04:00
u32 __skb_get_poff ( const struct sk_buff * skb , void * data ,
const struct flow_keys * keys , int hlen )
2013-03-19 10:39:30 +04:00
{
2014-09-06 03:20:26 +04:00
u32 poff = keys - > thoff ;
2013-03-19 10:39:30 +04:00
2014-09-06 03:20:26 +04:00
switch ( keys - > ip_proto ) {
2013-03-19 10:39:30 +04:00
case IPPROTO_TCP : {
2014-10-10 23:09:12 +04:00
/* access doff as u8 to avoid unaligned access */
const u8 * doff ;
u8 _doff ;
2013-03-19 10:39:30 +04:00
2014-10-10 23:09:12 +04:00
doff = __skb_header_pointer ( skb , poff + 12 , sizeof ( _doff ) ,
data , hlen , & _doff ) ;
if ( ! doff )
2013-03-19 10:39:30 +04:00
return poff ;
2014-10-10 23:09:12 +04:00
poff + = max_t ( u32 , sizeof ( struct tcphdr ) , ( * doff & 0xF0 ) > > 2 ) ;
2013-03-19 10:39:30 +04:00
break ;
}
case IPPROTO_UDP :
case IPPROTO_UDPLITE :
poff + = sizeof ( struct udphdr ) ;
break ;
/* For the rest, we do not really care about header
* extensions at this point for now .
*/
case IPPROTO_ICMP :
poff + = sizeof ( struct icmphdr ) ;
break ;
case IPPROTO_ICMPV6 :
poff + = sizeof ( struct icmp6hdr ) ;
break ;
case IPPROTO_IGMP :
poff + = sizeof ( struct igmphdr ) ;
break ;
case IPPROTO_DCCP :
poff + = sizeof ( struct dccp_hdr ) ;
break ;
case IPPROTO_SCTP :
poff + = sizeof ( struct sctphdr ) ;
break ;
}
return poff ;
}
2014-09-06 03:20:26 +04:00
/* skb_get_poff() returns the offset to the payload as far as it could
* be dissected . The main user is currently BPF , so that we can dynamically
* truncate packets without needing to push actual payload to the user
* space and can analyze headers only , instead .
*/
u32 skb_get_poff ( const struct sk_buff * skb )
{
struct flow_keys keys ;
if ( ! skb_flow_dissect ( skb , & keys ) )
return 0 ;
return __skb_get_poff ( skb , skb - > data , & keys , skb_headlen ( skb ) ) ;
}