2007-09-27 23:48:05 +04:00
/*
* Stateless NAT actions
*
* Copyright ( c ) 2007 Herbert Xu < herbert @ gondor . apana . org . au >
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation ; either version 2 of the License , or ( at your option )
* any later version .
*/
# include <linux/errno.h>
# include <linux/init.h>
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/netfilter.h>
# include <linux/rtnetlink.h>
# include <linux/skbuff.h>
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/string.h>
# include <linux/tc_act/tc_nat.h>
# include <net/act_api.h>
# include <net/icmp.h>
# include <net/ip.h>
# include <net/netlink.h>
# include <net/tc_act/tc_nat.h>
# include <net/tcp.h>
# include <net/udp.h>
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 04:58:21 +03:00
static unsigned int nat_net_id ;
2016-07-26 02:09:41 +03:00
static struct tc_action_ops act_nat_ops ;
2016-02-23 02:57:53 +03:00
2008-01-24 07:36:30 +03:00
static const struct nla_policy nat_policy [ TCA_NAT_MAX + 1 ] = {
[ TCA_NAT_PARMS ] = { . len = sizeof ( struct tc_nat ) } ,
} ;
2013-01-14 09:15:39 +04:00
static int tcf_nat_init ( struct net * net , struct nlattr * nla , struct nlattr * est ,
2018-02-15 18:54:56 +03:00
struct tc_action * * a , int ovr , int bind ,
struct netlink_ext_ack * extack )
2007-09-27 23:48:05 +04:00
{
2016-02-23 02:57:53 +03:00
struct tc_action_net * tn = net_generic ( net , nat_net_id ) ;
2008-01-23 09:11:50 +03:00
struct nlattr * tb [ TCA_NAT_MAX + 1 ] ;
2007-09-27 23:48:05 +04:00
struct tc_nat * parm ;
2008-01-24 07:33:32 +03:00
int ret = 0 , err ;
2007-09-27 23:48:05 +04:00
struct tcf_nat * p ;
2008-01-24 07:33:32 +03:00
if ( nla = = NULL )
2007-09-27 23:48:05 +04:00
return - EINVAL ;
2017-04-12 15:34:07 +03:00
err = nla_parse_nested ( tb , TCA_NAT_MAX , nla , nat_policy , NULL ) ;
2008-01-24 07:33:32 +03:00
if ( err < 0 )
return err ;
2008-01-24 07:36:30 +03:00
if ( tb [ TCA_NAT_PARMS ] = = NULL )
2007-09-27 23:48:05 +04:00
return - EINVAL ;
2008-01-23 09:11:50 +03:00
parm = nla_data ( tb [ TCA_NAT_PARMS ] ) ;
2007-09-27 23:48:05 +04:00
2017-08-30 09:31:59 +03:00
if ( ! tcf_idr_check ( tn , parm - > index , a , bind ) ) {
ret = tcf_idr_create ( tn , parm - > index , est , a ,
& act_nat_ops , bind , false ) ;
2014-02-12 05:07:31 +04:00
if ( ret )
return ret ;
2007-09-27 23:48:05 +04:00
ret = ACT_P_CREATED ;
} else {
2013-12-23 17:02:11 +04:00
if ( bind )
return 0 ;
2017-08-30 09:31:59 +03:00
tcf_idr_release ( * a , bind ) ;
2013-12-23 17:02:11 +04:00
if ( ! ovr )
2007-09-27 23:48:05 +04:00
return - EEXIST ;
}
2016-07-26 02:09:41 +03:00
p = to_tcf_nat ( * a ) ;
2007-09-27 23:48:05 +04:00
spin_lock_bh ( & p - > tcf_lock ) ;
p - > old_addr = parm - > old_addr ;
p - > new_addr = parm - > new_addr ;
p - > mask = parm - > mask ;
p - > flags = parm - > flags ;
p - > tcf_action = parm - > action ;
spin_unlock_bh ( & p - > tcf_lock ) ;
if ( ret = = ACT_P_CREATED )
2017-08-30 09:31:59 +03:00
tcf_idr_insert ( tn , * a ) ;
2007-09-27 23:48:05 +04:00
return ret ;
}
2011-07-06 03:25:42 +04:00
static int tcf_nat ( struct sk_buff * skb , const struct tc_action * a ,
2007-09-27 23:48:05 +04:00
struct tcf_result * res )
{
2016-07-26 02:09:41 +03:00
struct tcf_nat * p = to_tcf_nat ( a ) ;
2007-09-27 23:48:05 +04:00
struct iphdr * iph ;
__be32 old_addr ;
__be32 new_addr ;
__be32 mask ;
__be32 addr ;
int egress ;
int action ;
int ihl ;
2010-08-03 21:39:18 +04:00
int noff ;
2007-09-27 23:48:05 +04:00
spin_lock ( & p - > tcf_lock ) ;
2016-06-06 13:32:53 +03:00
tcf_lastuse_update ( & p - > tcf_tm ) ;
2007-09-27 23:48:05 +04:00
old_addr = p - > old_addr ;
new_addr = p - > new_addr ;
mask = p - > mask ;
egress = p - > flags & TCA_NAT_FLAG_EGRESS ;
action = p - > tcf_action ;
2011-01-09 11:30:54 +03:00
bstats_update ( & p - > tcf_bstats , skb ) ;
2007-09-27 23:48:05 +04:00
spin_unlock ( & p - > tcf_lock ) ;
if ( unlikely ( action = = TC_ACT_SHOT ) )
goto drop ;
2010-08-03 21:39:18 +04:00
noff = skb_network_offset ( skb ) ;
if ( ! pskb_may_pull ( skb , sizeof ( * iph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
iph = ip_hdr ( skb ) ;
if ( egress )
addr = iph - > saddr ;
else
addr = iph - > daddr ;
if ( ! ( ( old_addr ^ addr ) & mask ) ) {
2016-02-20 01:05:25 +03:00
if ( skb_try_make_writable ( skb , sizeof ( * iph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
new_addr & = mask ;
new_addr | = addr & ~ mask ;
/* Rewrite IP header */
iph = ip_hdr ( skb ) ;
if ( egress )
iph - > saddr = new_addr ;
else
iph - > daddr = new_addr ;
2007-11-29 17:17:11 +03:00
csum_replace4 ( & iph - > check , addr , new_addr ) ;
2010-05-29 18:26:59 +04:00
} else if ( ( iph - > frag_off & htons ( IP_OFFSET ) ) | |
iph - > protocol ! = IPPROTO_ICMP ) {
goto out ;
2007-09-27 23:48:05 +04:00
}
ihl = iph - > ihl * 4 ;
/* It would be nice to share code with stateful NAT. */
switch ( iph - > frag_off & htons ( IP_OFFSET ) ? 0 : iph - > protocol ) {
case IPPROTO_TCP :
{
struct tcphdr * tcph ;
2010-08-03 21:39:18 +04:00
if ( ! pskb_may_pull ( skb , ihl + sizeof ( * tcph ) + noff ) | |
2016-02-20 01:05:25 +03:00
skb_try_make_writable ( skb , ihl + sizeof ( * tcph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
tcph = ( void * ) ( skb_network_header ( skb ) + ihl ) ;
2015-08-17 23:42:25 +03:00
inet_proto_csum_replace4 ( & tcph - > check , skb , addr , new_addr ,
true ) ;
2007-09-27 23:48:05 +04:00
break ;
}
case IPPROTO_UDP :
{
struct udphdr * udph ;
2010-08-03 21:39:18 +04:00
if ( ! pskb_may_pull ( skb , ihl + sizeof ( * udph ) + noff ) | |
2016-02-20 01:05:25 +03:00
skb_try_make_writable ( skb , ihl + sizeof ( * udph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
udph = ( void * ) ( skb_network_header ( skb ) + ihl ) ;
if ( udph - > check | | skb - > ip_summed = = CHECKSUM_PARTIAL ) {
2007-11-29 17:17:11 +03:00
inet_proto_csum_replace4 ( & udph - > check , skb , addr ,
2015-08-17 23:42:25 +03:00
new_addr , true ) ;
2007-09-27 23:48:05 +04:00
if ( ! udph - > check )
udph - > check = CSUM_MANGLED_0 ;
}
break ;
}
case IPPROTO_ICMP :
{
struct icmphdr * icmph ;
2010-08-03 21:39:18 +04:00
if ( ! pskb_may_pull ( skb , ihl + sizeof ( * icmph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
icmph = ( void * ) ( skb_network_header ( skb ) + ihl ) ;
if ( ( icmph - > type ! = ICMP_DEST_UNREACH ) & &
( icmph - > type ! = ICMP_TIME_EXCEEDED ) & &
( icmph - > type ! = ICMP_PARAMETERPROB ) )
break ;
2010-08-03 21:39:18 +04:00
if ( ! pskb_may_pull ( skb , ihl + sizeof ( * icmph ) + sizeof ( * iph ) +
noff ) )
2010-07-09 19:33:25 +04:00
goto drop ;
2010-07-29 17:41:46 +04:00
icmph = ( void * ) ( skb_network_header ( skb ) + ihl ) ;
2007-09-27 23:48:05 +04:00
iph = ( void * ) ( icmph + 1 ) ;
if ( egress )
addr = iph - > daddr ;
else
addr = iph - > saddr ;
if ( ( old_addr ^ addr ) & mask )
break ;
2016-02-20 01:05:25 +03:00
if ( skb_try_make_writable ( skb , ihl + sizeof ( * icmph ) +
sizeof ( * iph ) + noff ) )
2007-09-27 23:48:05 +04:00
goto drop ;
icmph = ( void * ) ( skb_network_header ( skb ) + ihl ) ;
iph = ( void * ) ( icmph + 1 ) ;
new_addr & = mask ;
new_addr | = addr & ~ mask ;
/* XXX Fix up the inner checksums. */
if ( egress )
iph - > daddr = new_addr ;
else
iph - > saddr = new_addr ;
2007-11-29 17:17:11 +03:00
inet_proto_csum_replace4 ( & icmph - > checksum , skb , addr , new_addr ,
2015-08-17 23:42:25 +03:00
false ) ;
2007-09-27 23:48:05 +04:00
break ;
}
default :
break ;
}
2010-05-29 18:26:59 +04:00
out :
2007-09-27 23:48:05 +04:00
return action ;
drop :
spin_lock ( & p - > tcf_lock ) ;
p - > tcf_qstats . drops + + ;
spin_unlock ( & p - > tcf_lock ) ;
return TC_ACT_SHOT ;
}
static int tcf_nat_dump ( struct sk_buff * skb , struct tc_action * a ,
int bind , int ref )
{
unsigned char * b = skb_tail_pointer ( skb ) ;
2016-07-26 02:09:41 +03:00
struct tcf_nat * p = to_tcf_nat ( a ) ;
2010-08-17 00:04:22 +04:00
struct tc_nat opt = {
. old_addr = p - > old_addr ,
. new_addr = p - > new_addr ,
. mask = p - > mask ,
. flags = p - > flags ,
. index = p - > tcf_index ,
. action = p - > tcf_action ,
. refcnt = p - > tcf_refcnt - ref ,
. bindcnt = p - > tcf_bindcnt - bind ,
} ;
2007-09-27 23:48:05 +04:00
struct tcf_t t ;
2012-03-29 13:11:39 +04:00
if ( nla_put ( skb , TCA_NAT_PARMS , sizeof ( opt ) , & opt ) )
goto nla_put_failure ;
2016-06-06 13:32:55 +03:00
tcf_tm_dump ( & t , & p - > tcf_tm ) ;
2016-04-26 11:06:18 +03:00
if ( nla_put_64bit ( skb , TCA_NAT_TM , sizeof ( t ) , & t , TCA_NAT_PAD ) )
2012-03-29 13:11:39 +04:00
goto nla_put_failure ;
2007-09-27 23:48:05 +04:00
return skb - > len ;
2008-01-23 09:11:50 +03:00
nla_put_failure :
2007-09-27 23:48:05 +04:00
nlmsg_trim ( skb , b ) ;
return - 1 ;
}
2016-02-23 02:57:53 +03:00
static int tcf_nat_walker ( struct net * net , struct sk_buff * skb ,
struct netlink_callback * cb , int type ,
2018-02-15 18:54:58 +03:00
const struct tc_action_ops * ops ,
struct netlink_ext_ack * extack )
2016-02-23 02:57:53 +03:00
{
struct tc_action_net * tn = net_generic ( net , nat_net_id ) ;
2018-02-15 18:54:59 +03:00
return tcf_generic_walker ( tn , skb , cb , type , ops , extack ) ;
2016-02-23 02:57:53 +03:00
}
2018-02-15 18:54:57 +03:00
static int tcf_nat_search ( struct net * net , struct tc_action * * a , u32 index ,
struct netlink_ext_ack * extack )
2016-02-23 02:57:53 +03:00
{
struct tc_action_net * tn = net_generic ( net , nat_net_id ) ;
2017-08-30 09:31:59 +03:00
return tcf_idr_search ( tn , a , index ) ;
2016-02-23 02:57:53 +03:00
}
2007-09-27 23:48:05 +04:00
static struct tc_action_ops act_nat_ops = {
. kind = " nat " ,
. type = TCA_ACT_NAT ,
. owner = THIS_MODULE ,
. act = tcf_nat ,
. dump = tcf_nat_dump ,
. init = tcf_nat_init ,
2016-02-23 02:57:53 +03:00
. walk = tcf_nat_walker ,
. lookup = tcf_nat_search ,
2016-07-26 02:09:41 +03:00
. size = sizeof ( struct tcf_nat ) ,
2016-02-23 02:57:53 +03:00
} ;
static __net_init int nat_init_net ( struct net * net )
{
struct tc_action_net * tn = net_generic ( net , nat_net_id ) ;
2017-11-07 00:47:18 +03:00
return tc_action_net_init ( tn , & act_nat_ops ) ;
2016-02-23 02:57:53 +03:00
}
2017-12-12 02:35:03 +03:00
static void __net_exit nat_exit_net ( struct list_head * net_list )
2016-02-23 02:57:53 +03:00
{
2017-12-12 02:35:03 +03:00
tc_action_net_exit ( net_list , nat_net_id ) ;
2016-02-23 02:57:53 +03:00
}
static struct pernet_operations nat_net_ops = {
. init = nat_init_net ,
2017-12-12 02:35:03 +03:00
. exit_batch = nat_exit_net ,
2016-02-23 02:57:53 +03:00
. id = & nat_net_id ,
. size = sizeof ( struct tc_action_net ) ,
2007-09-27 23:48:05 +04:00
} ;
MODULE_DESCRIPTION ( " Stateless NAT actions " ) ;
MODULE_LICENSE ( " GPL " ) ;
static int __init nat_init_module ( void )
{
2016-02-23 02:57:53 +03:00
return tcf_register_action ( & act_nat_ops , & nat_net_ops ) ;
2007-09-27 23:48:05 +04:00
}
static void __exit nat_cleanup_module ( void )
{
2016-02-23 02:57:53 +03:00
tcf_unregister_action ( & act_nat_ops , & nat_net_ops ) ;
2007-09-27 23:48:05 +04:00
}
module_init ( nat_init_module ) ;
module_exit ( nat_cleanup_module ) ;