2005-04-16 15:20:36 -07:00
/*
* Anycast support for IPv6
2007-02-09 23:24:49 +09:00
* Linux INET6 implementation
2005-04-16 15:20:36 -07:00
*
* Authors :
* David L Stevens ( dlstevens @ us . ibm . com )
*
* based heavily on net / ipv6 / mcast . c
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
2006-01-11 12:17:47 -08:00
# include <linux/capability.h>
2005-04-16 15:20:36 -07:00
# include <linux/module.h>
# include <linux/errno.h>
# include <linux/types.h>
# include <linux/random.h>
# include <linux/string.h>
# include <linux/socket.h>
# include <linux/sockios.h>
# include <linux/net.h>
# include <linux/in6.h>
# include <linux/netdevice.h>
# include <linux/if_arp.h>
# include <linux/route.h>
# include <linux/init.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2007-09-12 12:01:34 +02:00
# include <net/net_namespace.h>
2005-04-16 15:20:36 -07:00
# include <net/sock.h>
# include <net/snmp.h>
# include <net/ipv6.h>
# include <net/protocol.h>
# include <net/if_inet6.h>
# include <net/ndisc.h>
# include <net/addrconf.h>
# include <net/ip6_route.h>
# include <net/checksum.h>
static int ipv6_dev_ac_dec ( struct net_device * dev , struct in6_addr * addr ) ;
/* Big ac list lock for all the sockets */
static DEFINE_RWLOCK ( ipv6_sk_ac_lock ) ;
/*
* socket join an anycast group
*/
int ipv6_sock_ac_join ( struct sock * sk , int ifindex , struct in6_addr * addr )
{
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct net_device * dev = NULL ;
struct inet6_dev * idev ;
struct ipv6_ac_socklist * pac ;
2008-03-26 16:52:32 -07:00
struct net * net = sock_net ( sk ) ;
2008-07-19 22:35:03 -07:00
int ishost = ! net - > ipv6 . devconf_all - > forwarding ;
2005-04-16 15:20:36 -07:00
int err = 0 ;
if ( ! capable ( CAP_NET_ADMIN ) )
return - EPERM ;
if ( ipv6_addr_is_multicast ( addr ) )
return - EINVAL ;
2008-03-26 16:52:32 -07:00
if ( ipv6_chk_addr ( net , addr , NULL , 0 ) )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
pac = sock_kmalloc ( sk , sizeof ( struct ipv6_ac_socklist ) , GFP_KERNEL ) ;
if ( pac = = NULL )
return - ENOMEM ;
pac - > acl_next = NULL ;
ipv6_addr_copy ( & pac - > acl_addr , addr ) ;
if ( ifindex = = 0 ) {
struct rt6_info * rt ;
2008-03-26 16:52:32 -07:00
rt = rt6_lookup ( net , addr , NULL , 0 , 0 ) ;
2005-04-16 15:20:36 -07:00
if ( rt ) {
dev = rt - > rt6i_dev ;
dev_hold ( dev ) ;
dst_release ( & rt - > u . dst ) ;
} else if ( ishost ) {
err = - EADDRNOTAVAIL ;
goto out_free_pac ;
} else {
/* router, no matching interface: just pick one */
2008-03-26 16:52:32 -07:00
dev = dev_get_by_flags ( net , IFF_UP , IFF_UP | IFF_LOOPBACK ) ;
2005-04-16 15:20:36 -07:00
}
} else
2008-03-26 16:52:32 -07:00
dev = dev_get_by_index ( net , ifindex ) ;
2005-04-16 15:20:36 -07:00
if ( dev = = NULL ) {
err = - ENODEV ;
goto out_free_pac ;
}
idev = in6_dev_get ( dev ) ;
if ( ! idev ) {
if ( ifindex )
err = - ENODEV ;
else
err = - EADDRNOTAVAIL ;
goto out_dev_put ;
}
/* reset ishost, now that we have a specific device */
ishost = ! idev - > cnf . forwarding ;
in6_dev_put ( idev ) ;
pac - > acl_ifindex = dev - > ifindex ;
/* XXX
* For hosts , allow link - local or matching prefix anycasts .
* This obviates the need for propagating anycast routes while
* still allowing some non - router anycast participation .
*/
2008-03-15 22:54:23 -04:00
if ( ! ipv6_chk_prefix ( addr , dev ) ) {
2005-04-16 15:20:36 -07:00
if ( ishost )
err = - EADDRNOTAVAIL ;
if ( err )
goto out_dev_put ;
}
err = ipv6_dev_ac_inc ( dev , addr ) ;
if ( err )
goto out_dev_put ;
write_lock_bh ( & ipv6_sk_ac_lock ) ;
pac - > acl_next = np - > ipv6_ac_list ;
np - > ipv6_ac_list = pac ;
write_unlock_bh ( & ipv6_sk_ac_lock ) ;
dev_put ( dev ) ;
return 0 ;
out_dev_put :
dev_put ( dev ) ;
out_free_pac :
sock_kfree_s ( sk , pac , sizeof ( * pac ) ) ;
return err ;
}
/*
* socket leave an anycast group
*/
int ipv6_sock_ac_drop ( struct sock * sk , int ifindex , struct in6_addr * addr )
{
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct net_device * dev ;
struct ipv6_ac_socklist * pac , * prev_pac ;
2008-03-26 16:52:32 -07:00
struct net * net = sock_net ( sk ) ;
2005-04-16 15:20:36 -07:00
write_lock_bh ( & ipv6_sk_ac_lock ) ;
prev_pac = NULL ;
for ( pac = np - > ipv6_ac_list ; pac ; pac = pac - > acl_next ) {
if ( ( ifindex = = 0 | | pac - > acl_ifindex = = ifindex ) & &
ipv6_addr_equal ( & pac - > acl_addr , addr ) )
break ;
prev_pac = pac ;
}
if ( ! pac ) {
write_unlock_bh ( & ipv6_sk_ac_lock ) ;
return - ENOENT ;
}
if ( prev_pac )
prev_pac - > acl_next = pac - > acl_next ;
else
np - > ipv6_ac_list = pac - > acl_next ;
write_unlock_bh ( & ipv6_sk_ac_lock ) ;
2008-03-26 16:52:32 -07:00
dev = dev_get_by_index ( net , pac - > acl_ifindex ) ;
2005-04-16 15:20:36 -07:00
if ( dev ) {
ipv6_dev_ac_dec ( dev , & pac - > acl_addr ) ;
dev_put ( dev ) ;
}
sock_kfree_s ( sk , pac , sizeof ( * pac ) ) ;
return 0 ;
}
void ipv6_sock_ac_close ( struct sock * sk )
{
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
struct net_device * dev = NULL ;
struct ipv6_ac_socklist * pac ;
2008-03-26 16:52:32 -07:00
struct net * net = sock_net ( sk ) ;
2005-04-16 15:20:36 -07:00
int prev_index ;
write_lock_bh ( & ipv6_sk_ac_lock ) ;
pac = np - > ipv6_ac_list ;
np - > ipv6_ac_list = NULL ;
write_unlock_bh ( & ipv6_sk_ac_lock ) ;
prev_index = 0 ;
while ( pac ) {
struct ipv6_ac_socklist * next = pac - > acl_next ;
if ( pac - > acl_ifindex ! = prev_index ) {
if ( dev )
dev_put ( dev ) ;
2008-03-26 16:52:32 -07:00
dev = dev_get_by_index ( net , pac - > acl_ifindex ) ;
2005-04-16 15:20:36 -07:00
prev_index = pac - > acl_ifindex ;
}
if ( dev )
ipv6_dev_ac_dec ( dev , & pac - > acl_addr ) ;
sock_kfree_s ( sk , pac , sizeof ( * pac ) ) ;
pac = next ;
}
if ( dev )
dev_put ( dev ) ;
}
#if 0
/* The function is not used, which is funny. Apparently, author
* supposed to use it to filter out datagrams inside udp / raw but forgot .
*
* It is OK , anycasts are not special comparing to delivery to unicasts .
*/
int inet6_ac_check ( struct sock * sk , struct in6_addr * addr , int ifindex )
{
struct ipv6_ac_socklist * pac ;
struct ipv6_pinfo * np = inet6_sk ( sk ) ;
int found ;
found = 0 ;
read_lock ( & ipv6_sk_ac_lock ) ;
for ( pac = np - > ipv6_ac_list ; pac ; pac = pac - > acl_next ) {
if ( ifindex & & pac - > acl_ifindex ! = ifindex )
continue ;
found = ipv6_addr_equal ( & pac - > acl_addr , addr ) ;
if ( found )
break ;
}
read_unlock ( & ipv6_sk_ac_lock ) ;
return found ;
}
# endif
static void aca_put ( struct ifacaddr6 * ac )
{
if ( atomic_dec_and_test ( & ac - > aca_refcnt ) ) {
in6_dev_put ( ac - > aca_idev ) ;
dst_release ( & ac - > aca_rt - > u . dst ) ;
kfree ( ac ) ;
}
}
/*
* device anycast group inc ( add if not found )
*/
int ipv6_dev_ac_inc ( struct net_device * dev , struct in6_addr * addr )
{
struct ifacaddr6 * aca ;
struct inet6_dev * idev ;
struct rt6_info * rt ;
int err ;
idev = in6_dev_get ( dev ) ;
if ( idev = = NULL )
return - EINVAL ;
write_lock_bh ( & idev - > lock ) ;
if ( idev - > dead ) {
err = - ENODEV ;
goto out ;
}
for ( aca = idev - > ac_list ; aca ; aca = aca - > aca_next ) {
if ( ipv6_addr_equal ( & aca - > aca_addr , addr ) ) {
aca - > aca_users + + ;
err = 0 ;
goto out ;
}
}
/*
* not found : create a new one .
*/
2006-03-20 23:01:32 -08:00
aca = kzalloc ( sizeof ( struct ifacaddr6 ) , GFP_ATOMIC ) ;
2005-04-16 15:20:36 -07:00
if ( aca = = NULL ) {
err = - ENOMEM ;
goto out ;
}
rt = addrconf_dst_alloc ( idev , addr , 1 ) ;
if ( IS_ERR ( rt ) ) {
kfree ( aca ) ;
err = PTR_ERR ( rt ) ;
goto out ;
}
ipv6_addr_copy ( & aca - > aca_addr , addr ) ;
aca - > aca_idev = idev ;
aca - > aca_rt = rt ;
aca - > aca_users = 1 ;
/* aca_tstamp should be updated upon changes */
aca - > aca_cstamp = aca - > aca_tstamp = jiffies ;
atomic_set ( & aca - > aca_refcnt , 2 ) ;
spin_lock_init ( & aca - > aca_lock ) ;
aca - > aca_next = idev - > ac_list ;
idev - > ac_list = aca ;
write_unlock_bh ( & idev - > lock ) ;
2008-04-03 13:33:00 -07:00
ip6_ins_rt ( rt ) ;
2005-04-16 15:20:36 -07:00
addrconf_join_solict ( dev , & aca - > aca_addr ) ;
aca_put ( aca ) ;
return 0 ;
out :
write_unlock_bh ( & idev - > lock ) ;
in6_dev_put ( idev ) ;
return err ;
}
/*
* device anycast group decrement
*/
int __ipv6_dev_ac_dec ( struct inet6_dev * idev , struct in6_addr * addr )
{
struct ifacaddr6 * aca , * prev_aca ;
write_lock_bh ( & idev - > lock ) ;
prev_aca = NULL ;
for ( aca = idev - > ac_list ; aca ; aca = aca - > aca_next ) {
if ( ipv6_addr_equal ( & aca - > aca_addr , addr ) )
break ;
prev_aca = aca ;
}
if ( ! aca ) {
write_unlock_bh ( & idev - > lock ) ;
return - ENOENT ;
}
if ( - - aca - > aca_users > 0 ) {
write_unlock_bh ( & idev - > lock ) ;
return 0 ;
}
if ( prev_aca )
prev_aca - > aca_next = aca - > aca_next ;
else
idev - > ac_list = aca - > aca_next ;
write_unlock_bh ( & idev - > lock ) ;
addrconf_leave_solict ( idev , & aca - > aca_addr ) ;
dst_hold ( & aca - > aca_rt - > u . dst ) ;
2008-04-03 13:33:00 -07:00
ip6_del_rt ( aca - > aca_rt ) ;
2005-04-16 15:20:36 -07:00
aca_put ( aca ) ;
return 0 ;
}
static int ipv6_dev_ac_dec ( struct net_device * dev , struct in6_addr * addr )
{
int ret ;
struct inet6_dev * idev = in6_dev_get ( dev ) ;
if ( idev = = NULL )
return - ENODEV ;
ret = __ipv6_dev_ac_dec ( idev , addr ) ;
in6_dev_put ( idev ) ;
return ret ;
}
2007-02-09 23:24:49 +09:00
2005-04-16 15:20:36 -07:00
/*
* check if the interface has this anycast address
*/
static int ipv6_chk_acast_dev ( struct net_device * dev , struct in6_addr * addr )
{
struct inet6_dev * idev ;
struct ifacaddr6 * aca ;
idev = in6_dev_get ( dev ) ;
if ( idev ) {
read_lock_bh ( & idev - > lock ) ;
for ( aca = idev - > ac_list ; aca ; aca = aca - > aca_next )
if ( ipv6_addr_equal ( & aca - > aca_addr , addr ) )
break ;
read_unlock_bh ( & idev - > lock ) ;
in6_dev_put ( idev ) ;
2007-10-09 01:59:42 -07:00
return aca ! = NULL ;
2005-04-16 15:20:36 -07:00
}
return 0 ;
}
/*
* check if given interface ( or any , if dev = = 0 ) has this anycast address
*/
2008-03-26 16:52:32 -07:00
int ipv6_chk_acast_addr ( struct net * net , struct net_device * dev ,
struct in6_addr * addr )
2005-04-16 15:20:36 -07:00
{
2007-05-03 15:13:45 -07:00
int found = 0 ;
2005-04-16 15:20:36 -07:00
if ( dev )
return ipv6_chk_acast_dev ( dev , addr ) ;
2009-11-04 05:43:23 -08:00
rcu_read_lock ( ) ;
for_each_netdev_rcu ( net , dev )
2007-05-03 15:13:45 -07:00
if ( ipv6_chk_acast_dev ( dev , addr ) ) {
found = 1 ;
2005-04-16 15:20:36 -07:00
break ;
2007-05-03 15:13:45 -07:00
}
2009-11-04 05:43:23 -08:00
rcu_read_unlock ( ) ;
2007-05-03 15:13:45 -07:00
return found ;
2005-04-16 15:20:36 -07:00
}
# ifdef CONFIG_PROC_FS
struct ac6_iter_state {
2008-03-26 16:52:32 -07:00
struct seq_net_private p ;
2005-04-16 15:20:36 -07:00
struct net_device * dev ;
struct inet6_dev * idev ;
} ;
# define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
static inline struct ifacaddr6 * ac6_get_first ( struct seq_file * seq )
{
struct ifacaddr6 * im = NULL ;
struct ac6_iter_state * state = ac6_seq_private ( seq ) ;
2008-03-26 16:52:32 -07:00
struct net * net = seq_file_net ( seq ) ;
2005-04-16 15:20:36 -07:00
2007-05-03 15:13:45 -07:00
state - > idev = NULL ;
2008-03-26 16:52:32 -07:00
for_each_netdev ( net , state - > dev ) {
2005-04-16 15:20:36 -07:00
struct inet6_dev * idev ;
idev = in6_dev_get ( state - > dev ) ;
if ( ! idev )
continue ;
read_lock_bh ( & idev - > lock ) ;
im = idev - > ac_list ;
if ( im ) {
state - > idev = idev ;
break ;
}
read_unlock_bh ( & idev - > lock ) ;
2007-02-26 16:28:56 -08:00
in6_dev_put ( idev ) ;
2005-04-16 15:20:36 -07:00
}
return im ;
}
static struct ifacaddr6 * ac6_get_next ( struct seq_file * seq , struct ifacaddr6 * im )
{
struct ac6_iter_state * state = ac6_seq_private ( seq ) ;
im = im - > aca_next ;
while ( ! im ) {
if ( likely ( state - > idev ! = NULL ) ) {
read_unlock_bh ( & state - > idev - > lock ) ;
in6_dev_put ( state - > idev ) ;
}
2007-05-03 15:13:45 -07:00
state - > dev = next_net_device ( state - > dev ) ;
2005-04-16 15:20:36 -07:00
if ( ! state - > dev ) {
state - > idev = NULL ;
break ;
}
state - > idev = in6_dev_get ( state - > dev ) ;
if ( ! state - > idev )
continue ;
read_lock_bh ( & state - > idev - > lock ) ;
im = state - > idev - > ac_list ;
}
return im ;
}
static struct ifacaddr6 * ac6_get_idx ( struct seq_file * seq , loff_t pos )
{
struct ifacaddr6 * im = ac6_get_first ( seq ) ;
if ( im )
while ( pos & & ( im = ac6_get_next ( seq , im ) ) ! = NULL )
- - pos ;
return pos ? NULL : im ;
}
static void * ac6_seq_start ( struct seq_file * seq , loff_t * pos )
2008-01-01 21:58:02 -08:00
__acquires ( dev_base_lock )
2005-04-16 15:20:36 -07:00
{
read_lock ( & dev_base_lock ) ;
return ac6_get_idx ( seq , * pos ) ;
}
static void * ac6_seq_next ( struct seq_file * seq , void * v , loff_t * pos )
{
struct ifacaddr6 * im ;
im = ac6_get_next ( seq , v ) ;
+ + * pos ;
return im ;
}
static void ac6_seq_stop ( struct seq_file * seq , void * v )
2008-01-01 21:58:02 -08:00
__releases ( dev_base_lock )
2005-04-16 15:20:36 -07:00
{
struct ac6_iter_state * state = ac6_seq_private ( seq ) ;
if ( likely ( state - > idev ! = NULL ) ) {
read_unlock_bh ( & state - > idev - > lock ) ;
in6_dev_put ( state - > idev ) ;
}
read_unlock ( & dev_base_lock ) ;
}
static int ac6_seq_show ( struct seq_file * seq , void * v )
{
struct ifacaddr6 * im = ( struct ifacaddr6 * ) v ;
struct ac6_iter_state * state = ac6_seq_private ( seq ) ;
2008-10-29 12:50:24 -07:00
seq_printf ( seq , " %-4d %-15s %pi6 %5d \n " ,
2005-04-16 15:20:36 -07:00
state - > dev - > ifindex , state - > dev - > name ,
2008-10-28 16:05:40 -07:00
& im - > aca_addr , im - > aca_users ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2007-07-10 23:07:31 -07:00
static const struct seq_operations ac6_seq_ops = {
2005-04-16 15:20:36 -07:00
. start = ac6_seq_start ,
. next = ac6_seq_next ,
. stop = ac6_seq_stop ,
. show = ac6_seq_show ,
} ;
static int ac6_seq_open ( struct inode * inode , struct file * file )
{
2008-03-26 16:52:32 -07:00
return seq_open_net ( inode , file , & ac6_seq_ops ,
sizeof ( struct ac6_iter_state ) ) ;
2005-04-16 15:20:36 -07:00
}
2007-02-12 00:55:35 -08:00
static const struct file_operations ac6_seq_fops = {
2005-04-16 15:20:36 -07:00
. owner = THIS_MODULE ,
. open = ac6_seq_open ,
. read = seq_read ,
. llseek = seq_lseek ,
2008-03-26 16:52:32 -07:00
. release = seq_release_net ,
2005-04-16 15:20:36 -07:00
} ;
2008-03-26 16:52:32 -07:00
int ac6_proc_init ( struct net * net )
2005-04-16 15:20:36 -07:00
{
2008-03-26 16:52:32 -07:00
if ( ! proc_net_fops_create ( net , " anycast6 " , S_IRUGO , & ac6_seq_fops ) )
2005-04-16 15:20:36 -07:00
return - ENOMEM ;
return 0 ;
}
2008-03-26 16:52:32 -07:00
void ac6_proc_exit ( struct net * net )
2005-04-16 15:20:36 -07:00
{
2008-03-26 16:52:32 -07:00
proc_net_remove ( net , " anycast6 " ) ;
2005-04-16 15:20:36 -07:00
}
# endif