2005-04-16 15:20:36 -07:00
/*
* ip_vs_proto_udp . c : UDP load balancing support for IPVS
*
* Version : $ Id : ip_vs_proto_udp . c , v 1.3 2002 / 11 / 30 01 : 50 : 35 wensong Exp $
*
* Authors : Wensong Zhang < wensong @ linuxvirtualserver . org >
* Julian Anastasov < ja @ ssi . bg >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Changes :
*
*/
2005-12-27 02:43:12 -02:00
# include <linux/in.h>
# include <linux/ip.h>
2005-04-16 15:20:36 -07:00
# include <linux/kernel.h>
# include <linux/netfilter_ipv4.h>
2005-12-27 02:43:12 -02:00
# include <linux/udp.h>
2005-04-16 15:20:36 -07:00
# include <net/ip_vs.h>
static struct ip_vs_conn *
udp_conn_in_get ( const struct sk_buff * skb , struct ip_vs_protocol * pp ,
const struct iphdr * iph , unsigned int proto_off , int inverse )
{
struct ip_vs_conn * cp ;
__u16 _ports [ 2 ] , * pptr ;
pptr = skb_header_pointer ( skb , proto_off , sizeof ( _ports ) , _ports ) ;
if ( pptr = = NULL )
return NULL ;
if ( likely ( ! inverse ) ) {
cp = ip_vs_conn_in_get ( iph - > protocol ,
iph - > saddr , pptr [ 0 ] ,
iph - > daddr , pptr [ 1 ] ) ;
} else {
cp = ip_vs_conn_in_get ( iph - > protocol ,
iph - > daddr , pptr [ 1 ] ,
iph - > saddr , pptr [ 0 ] ) ;
}
return cp ;
}
static struct ip_vs_conn *
udp_conn_out_get ( const struct sk_buff * skb , struct ip_vs_protocol * pp ,
const struct iphdr * iph , unsigned int proto_off , int inverse )
{
struct ip_vs_conn * cp ;
__u16 _ports [ 2 ] , * pptr ;
pptr = skb_header_pointer ( skb , skb - > nh . iph - > ihl * 4 ,
sizeof ( _ports ) , _ports ) ;
if ( pptr = = NULL )
return NULL ;
if ( likely ( ! inverse ) ) {
cp = ip_vs_conn_out_get ( iph - > protocol ,
iph - > saddr , pptr [ 0 ] ,
iph - > daddr , pptr [ 1 ] ) ;
} else {
cp = ip_vs_conn_out_get ( iph - > protocol ,
iph - > daddr , pptr [ 1 ] ,
iph - > saddr , pptr [ 0 ] ) ;
}
return cp ;
}
static int
udp_conn_schedule ( struct sk_buff * skb , struct ip_vs_protocol * pp ,
int * verdict , struct ip_vs_conn * * cpp )
{
struct ip_vs_service * svc ;
struct udphdr _udph , * uh ;
uh = skb_header_pointer ( skb , skb - > nh . iph - > ihl * 4 ,
sizeof ( _udph ) , & _udph ) ;
if ( uh = = NULL ) {
* verdict = NF_DROP ;
return 0 ;
}
if ( ( svc = ip_vs_service_get ( skb - > nfmark , skb - > nh . iph - > protocol ,
skb - > nh . iph - > daddr , uh - > dest ) ) ) {
if ( ip_vs_todrop ( ) ) {
/*
* It seems that we are very loaded .
* We have to drop this packet : (
*/
ip_vs_service_put ( svc ) ;
* verdict = NF_DROP ;
return 0 ;
}
/*
* Let the virtual server select a real server for the
* incoming connection , and create a connection entry .
*/
* cpp = ip_vs_schedule ( svc , skb ) ;
if ( ! * cpp ) {
* verdict = ip_vs_leave ( svc , skb , pp ) ;
return 0 ;
}
ip_vs_service_put ( svc ) ;
}
return 1 ;
}
static inline void
udp_fast_csum_update ( struct udphdr * uhdr , u32 oldip , u32 newip ,
u16 oldport , u16 newport )
{
uhdr - > check =
ip_vs_check_diff ( ~ oldip , newip ,
ip_vs_check_diff ( oldport ^ 0xFFFF ,
newport , uhdr - > check ) ) ;
if ( ! uhdr - > check )
uhdr - > check = 0xFFFF ;
}
static int
udp_snat_handler ( struct sk_buff * * pskb ,
struct ip_vs_protocol * pp , struct ip_vs_conn * cp )
{
struct udphdr * udph ;
unsigned int udphoff = ( * pskb ) - > nh . iph - > ihl * 4 ;
/* csum_check requires unshared skb */
if ( ! ip_vs_make_skb_writable ( pskb , udphoff + sizeof ( * udph ) ) )
return 0 ;
if ( unlikely ( cp - > app ! = NULL ) ) {
/* Some checks before mangling */
if ( pp - > csum_check & & ! pp - > csum_check ( * pskb , pp ) )
return 0 ;
/*
* Call application helper if needed
*/
if ( ! ip_vs_app_pkt_out ( cp , pskb ) )
return 0 ;
}
udph = ( void * ) ( * pskb ) - > nh . iph + udphoff ;
udph - > source = cp - > vport ;
/*
* Adjust UDP checksums
*/
if ( ! cp - > app & & ( udph - > check ! = 0 ) ) {
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update ( udph , cp - > daddr , cp - > vaddr ,
cp - > dport , cp - > vport ) ;
if ( ( * pskb ) - > ip_summed = = CHECKSUM_HW )
( * pskb ) - > ip_summed = CHECKSUM_NONE ;
} else {
/* full checksum calculation */
udph - > check = 0 ;
( * pskb ) - > csum = skb_checksum ( * pskb , udphoff ,
( * pskb ) - > len - udphoff , 0 ) ;
udph - > check = csum_tcpudp_magic ( cp - > vaddr , cp - > caddr ,
( * pskb ) - > len - udphoff ,
cp - > protocol ,
( * pskb ) - > csum ) ;
if ( udph - > check = = 0 )
udph - > check = 0xFFFF ;
IP_VS_DBG ( 11 , " O-pkt: %s O-csum=%d (+%zd) \n " ,
pp - > name , udph - > check ,
( char * ) & ( udph - > check ) - ( char * ) udph ) ;
}
return 1 ;
}
static int
udp_dnat_handler ( struct sk_buff * * pskb ,
struct ip_vs_protocol * pp , struct ip_vs_conn * cp )
{
struct udphdr * udph ;
unsigned int udphoff = ( * pskb ) - > nh . iph - > ihl * 4 ;
/* csum_check requires unshared skb */
if ( ! ip_vs_make_skb_writable ( pskb , udphoff + sizeof ( * udph ) ) )
return 0 ;
if ( unlikely ( cp - > app ! = NULL ) ) {
/* Some checks before mangling */
if ( pp - > csum_check & & ! pp - > csum_check ( * pskb , pp ) )
return 0 ;
/*
* Attempt ip_vs_app call .
* It will fix ip_vs_conn
*/
if ( ! ip_vs_app_pkt_in ( cp , pskb ) )
return 0 ;
}
udph = ( void * ) ( * pskb ) - > nh . iph + udphoff ;
udph - > dest = cp - > dport ;
/*
* Adjust UDP checksums
*/
if ( ! cp - > app & & ( udph - > check ! = 0 ) ) {
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update ( udph , cp - > vaddr , cp - > daddr ,
cp - > vport , cp - > dport ) ;
if ( ( * pskb ) - > ip_summed = = CHECKSUM_HW )
( * pskb ) - > ip_summed = CHECKSUM_NONE ;
} else {
/* full checksum calculation */
udph - > check = 0 ;
( * pskb ) - > csum = skb_checksum ( * pskb , udphoff ,
( * pskb ) - > len - udphoff , 0 ) ;
udph - > check = csum_tcpudp_magic ( cp - > caddr , cp - > daddr ,
( * pskb ) - > len - udphoff ,
cp - > protocol ,
( * pskb ) - > csum ) ;
if ( udph - > check = = 0 )
udph - > check = 0xFFFF ;
( * pskb ) - > ip_summed = CHECKSUM_UNNECESSARY ;
}
return 1 ;
}
static int
udp_csum_check ( struct sk_buff * skb , struct ip_vs_protocol * pp )
{
struct udphdr _udph , * uh ;
unsigned int udphoff = skb - > nh . iph - > ihl * 4 ;
uh = skb_header_pointer ( skb , udphoff , sizeof ( _udph ) , & _udph ) ;
if ( uh = = NULL )
return 0 ;
if ( uh - > check ! = 0 ) {
switch ( skb - > ip_summed ) {
case CHECKSUM_NONE :
skb - > csum = skb_checksum ( skb , udphoff ,
skb - > len - udphoff , 0 ) ;
case CHECKSUM_HW :
if ( csum_tcpudp_magic ( skb - > nh . iph - > saddr ,
skb - > nh . iph - > daddr ,
skb - > len - udphoff ,
skb - > nh . iph - > protocol ,
skb - > csum ) ) {
IP_VS_DBG_RL_PKT ( 0 , pp , skb , 0 ,
" Failed checksum for " ) ;
return 0 ;
}
break ;
default :
/* CHECKSUM_UNNECESSARY */
break ;
}
}
return 1 ;
}
/*
* Note : the caller guarantees that only one of register_app ,
* unregister_app or app_conn_bind is called each time .
*/
# define UDP_APP_TAB_BITS 4
# define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
# define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
static struct list_head udp_apps [ UDP_APP_TAB_SIZE ] ;
static DEFINE_SPINLOCK ( udp_app_lock ) ;
static inline __u16 udp_app_hashkey ( __u16 port )
{
return ( ( port > > UDP_APP_TAB_BITS ) ^ port ) & UDP_APP_TAB_MASK ;
}
static int udp_register_app ( struct ip_vs_app * inc )
{
struct ip_vs_app * i ;
__u16 hash , port = inc - > port ;
int ret = 0 ;
hash = udp_app_hashkey ( port ) ;
spin_lock_bh ( & udp_app_lock ) ;
list_for_each_entry ( i , & udp_apps [ hash ] , p_list ) {
if ( i - > port = = port ) {
ret = - EEXIST ;
goto out ;
}
}
list_add ( & inc - > p_list , & udp_apps [ hash ] ) ;
atomic_inc ( & ip_vs_protocol_udp . appcnt ) ;
out :
spin_unlock_bh ( & udp_app_lock ) ;
return ret ;
}
static void
udp_unregister_app ( struct ip_vs_app * inc )
{
spin_lock_bh ( & udp_app_lock ) ;
atomic_dec ( & ip_vs_protocol_udp . appcnt ) ;
list_del ( & inc - > p_list ) ;
spin_unlock_bh ( & udp_app_lock ) ;
}
static int udp_app_conn_bind ( struct ip_vs_conn * cp )
{
int hash ;
struct ip_vs_app * inc ;
int result = 0 ;
/* Default binding: bind app only for NAT */
if ( IP_VS_FWD_METHOD ( cp ) ! = IP_VS_CONN_F_MASQ )
return 0 ;
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey ( cp - > vport ) ;
spin_lock ( & udp_app_lock ) ;
list_for_each_entry ( inc , & udp_apps [ hash ] , p_list ) {
if ( inc - > port = = cp - > vport ) {
if ( unlikely ( ! ip_vs_app_inc_get ( inc ) ) )
break ;
spin_unlock ( & udp_app_lock ) ;
IP_VS_DBG ( 9 , " %s: Binding conn %u.%u.%u.%u:%u-> "
" %u.%u.%u.%u:%u to app %s on port %u \n " ,
__FUNCTION__ ,
NIPQUAD ( cp - > caddr ) , ntohs ( cp - > cport ) ,
NIPQUAD ( cp - > vaddr ) , ntohs ( cp - > vport ) ,
inc - > name , ntohs ( inc - > port ) ) ;
cp - > app = inc ;
if ( inc - > init_conn )
result = inc - > init_conn ( inc , cp ) ;
goto out ;
}
}
spin_unlock ( & udp_app_lock ) ;
out :
return result ;
}
static int udp_timeouts [ IP_VS_UDP_S_LAST + 1 ] = {
[ IP_VS_UDP_S_NORMAL ] = 5 * 60 * HZ ,
[ IP_VS_UDP_S_LAST ] = 2 * HZ ,
} ;
static char * udp_state_name_table [ IP_VS_UDP_S_LAST + 1 ] = {
[ IP_VS_UDP_S_NORMAL ] = " UDP " ,
[ IP_VS_UDP_S_LAST ] = " BUG! " ,
} ;
static int
udp_set_state_timeout ( struct ip_vs_protocol * pp , char * sname , int to )
{
return ip_vs_set_state_timeout ( pp - > timeout_table , IP_VS_UDP_S_LAST ,
udp_state_name_table , sname , to ) ;
}
static const char * udp_state_name ( int state )
{
if ( state > = IP_VS_UDP_S_LAST )
return " ERR! " ;
return udp_state_name_table [ state ] ? udp_state_name_table [ state ] : " ? " ;
}
static int
udp_state_transition ( struct ip_vs_conn * cp , int direction ,
const struct sk_buff * skb ,
struct ip_vs_protocol * pp )
{
cp - > timeout = pp - > timeout_table [ IP_VS_UDP_S_NORMAL ] ;
return 1 ;
}
static void udp_init ( struct ip_vs_protocol * pp )
{
IP_VS_INIT_HASH_TABLE ( udp_apps ) ;
pp - > timeout_table = udp_timeouts ;
}
static void udp_exit ( struct ip_vs_protocol * pp )
{
}
struct ip_vs_protocol ip_vs_protocol_udp = {
. name = " UDP " ,
. protocol = IPPROTO_UDP ,
. dont_defrag = 0 ,
. init = udp_init ,
. exit = udp_exit ,
. conn_schedule = udp_conn_schedule ,
. conn_in_get = udp_conn_in_get ,
. conn_out_get = udp_conn_out_get ,
. snat_handler = udp_snat_handler ,
. dnat_handler = udp_dnat_handler ,
. csum_check = udp_csum_check ,
. state_transition = udp_state_transition ,
. state_name = udp_state_name ,
. register_app = udp_register_app ,
. unregister_app = udp_unregister_app ,
. app_conn_bind = udp_app_conn_bind ,
. debug_packet = ip_vs_tcpudp_debug_packet ,
. timeout_change = NULL ,
. set_state_timeout = udp_set_state_timeout ,
} ;