2007-09-12 11:50:50 +02:00
# include <linux/workqueue.h>
# include <linux/rtnetlink.h>
# include <linux/cache.h>
# include <linux/slab.h>
# include <linux/list.h>
# include <linux/delay.h>
2007-09-26 22:04:26 -07:00
# include <linux/sched.h>
2008-04-15 00:35:23 -07:00
# include <linux/idr.h>
2009-07-10 09:51:33 +00:00
# include <linux/rculist.h>
2009-07-10 09:51:35 +00:00
# include <linux/nsproxy.h>
2007-09-12 11:50:50 +02:00
# include <net/net_namespace.h>
2008-04-15 00:36:08 -07:00
# include <net/netns/generic.h>
2007-09-12 11:50:50 +02:00
/*
* Our network namespace constructor / destructor lists
*/
static LIST_HEAD ( pernet_list ) ;
static struct list_head * first_device = & pernet_list ;
static DEFINE_MUTEX ( net_mutex ) ;
LIST_HEAD ( net_namespace_list ) ;
2008-10-08 11:35:06 +02:00
EXPORT_SYMBOL_GPL ( net_namespace_list ) ;
2007-09-12 11:50:50 +02:00
struct net init_net ;
2008-01-22 22:05:33 -08:00
EXPORT_SYMBOL ( init_net ) ;
2007-09-12 11:50:50 +02:00
2008-04-15 00:36:08 -07:00
# define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
2010-04-23 01:40:47 +00:00
static void net_generic_release ( struct rcu_head * rcu )
{
struct net_generic * ng ;
ng = container_of ( rcu , struct net_generic , rcu ) ;
kfree ( ng ) ;
}
static int net_assign_generic ( struct net * net , int id , void * data )
{
struct net_generic * ng , * old_ng ;
BUG_ON ( ! mutex_is_locked ( & net_mutex ) ) ;
BUG_ON ( id = = 0 ) ;
ng = old_ng = net - > gen ;
if ( old_ng - > len > = id )
goto assign ;
ng = kzalloc ( sizeof ( struct net_generic ) +
id * sizeof ( void * ) , GFP_KERNEL ) ;
if ( ng = = NULL )
return - ENOMEM ;
/*
* Some synchronisation notes :
*
* The net_generic explores the net - > gen array inside rcu
* read section . Besides once set the net - > gen - > ptr [ x ]
* pointer never changes ( see rules in netns / generic . h ) .
*
* That said , we simply duplicate this array and schedule
* the old copy for kfree after a grace period .
*/
ng - > len = id ;
memcpy ( & ng - > ptr , & old_ng - > ptr , old_ng - > len * sizeof ( void * ) ) ;
rcu_assign_pointer ( net - > gen , ng ) ;
call_rcu ( & old_ng - > rcu , net_generic_release ) ;
assign :
ng - > ptr [ id - 1 ] = data ;
return 0 ;
}
2009-11-29 22:25:28 +00:00
static int ops_init ( const struct pernet_operations * ops , struct net * net )
{
int err ;
if ( ops - > id & & ops - > size ) {
void * data = kzalloc ( ops - > size , GFP_KERNEL ) ;
if ( ! data )
return - ENOMEM ;
err = net_assign_generic ( net , * ops - > id , data ) ;
if ( err ) {
kfree ( data ) ;
return err ;
}
}
if ( ops - > init )
return ops - > init ( net ) ;
return 0 ;
}
static void ops_free ( const struct pernet_operations * ops , struct net * net )
{
if ( ops - > id & & ops - > size ) {
int id = * ops - > id ;
kfree ( net_generic ( net , id ) ) ;
}
}
2009-12-03 02:29:03 +00:00
static void ops_exit_list ( const struct pernet_operations * ops ,
struct list_head * net_exit_list )
{
struct net * net ;
if ( ops - > exit ) {
list_for_each_entry ( net , net_exit_list , exit_list )
ops - > exit ( net ) ;
}
if ( ops - > exit_batch )
ops - > exit_batch ( net_exit_list ) ;
}
static void ops_free_list ( const struct pernet_operations * ops ,
struct list_head * net_exit_list )
{
struct net * net ;
if ( ops - > size & & ops - > id ) {
list_for_each_entry ( net , net_exit_list , exit_list )
ops_free ( ops , net ) ;
}
}
2007-09-12 11:50:50 +02:00
/*
* setup_net runs the initializers for the network namespace object .
*/
2007-11-01 00:45:59 -07:00
static __net_init int setup_net ( struct net * net )
2007-09-12 11:50:50 +02:00
{
/* Must be called with net_mutex held */
2009-11-29 22:25:28 +00:00
const struct pernet_operations * ops , * saved_ops ;
2009-02-22 00:07:53 -08:00
int error = 0 ;
2009-12-03 02:29:03 +00:00
LIST_HEAD ( net_exit_list ) ;
2007-09-12 11:50:50 +02:00
atomic_set ( & net - > count , 1 ) ;
2009-02-22 00:07:53 -08:00
2008-04-16 01:58:04 -07:00
# ifdef NETNS_REFCNT_DEBUG
2007-09-12 11:50:50 +02:00
atomic_set ( & net - > use_count , 0 ) ;
2008-04-16 01:58:04 -07:00
# endif
2007-09-12 11:50:50 +02:00
2007-09-18 13:20:41 -07:00
list_for_each_entry ( ops , & pernet_list , list ) {
2009-11-29 22:25:28 +00:00
error = ops_init ( ops , net ) ;
if ( error < 0 )
goto out_undo ;
2007-09-12 11:50:50 +02:00
}
out :
return error ;
2007-09-18 13:20:41 -07:00
2007-09-12 11:50:50 +02:00
out_undo :
/* Walk through the list backwards calling the exit functions
* for the pernet modules whose init functions did not fail .
*/
2009-12-03 02:29:03 +00:00
list_add ( & net - > exit_list , & net_exit_list ) ;
2009-11-29 22:25:28 +00:00
saved_ops = ops ;
2009-12-03 02:29:03 +00:00
list_for_each_entry_continue_reverse ( ops , & pernet_list , list )
ops_exit_list ( ops , & net_exit_list ) ;
2009-11-29 22:25:28 +00:00
ops = saved_ops ;
list_for_each_entry_continue_reverse ( ops , & pernet_list , list )
2009-12-03 02:29:03 +00:00
ops_free_list ( ops , & net_exit_list ) ;
2007-10-30 15:38:57 -07:00
rcu_barrier ( ) ;
2007-09-12 11:50:50 +02:00
goto out ;
}
2009-02-22 00:07:53 -08:00
static struct net_generic * net_alloc_generic ( void )
2007-11-01 00:44:50 -07:00
{
2009-02-22 00:07:53 -08:00
struct net_generic * ng ;
size_t generic_size = sizeof ( struct net_generic ) +
INITIAL_NET_GEN_PTRS * sizeof ( void * ) ;
ng = kzalloc ( generic_size , GFP_KERNEL ) ;
if ( ng )
ng - > len = INITIAL_NET_GEN_PTRS ;
return ng ;
2007-11-01 00:44:50 -07:00
}
2009-02-23 15:37:35 -08:00
# ifdef CONFIG_NET_NS
static struct kmem_cache * net_cachep ;
static struct workqueue_struct * netns_wq ;
2009-02-22 00:07:53 -08:00
static struct net * net_alloc ( void )
2007-11-07 01:30:30 -08:00
{
2009-02-22 00:07:53 -08:00
struct net * net = NULL ;
struct net_generic * ng ;
ng = net_alloc_generic ( ) ;
if ( ! ng )
goto out ;
net = kmem_cache_zalloc ( net_cachep , GFP_KERNEL ) ;
2007-11-07 01:30:30 -08:00
if ( ! net )
2009-02-22 00:07:53 -08:00
goto out_free ;
2007-11-07 01:30:30 -08:00
2009-02-22 00:07:53 -08:00
rcu_assign_pointer ( net - > gen , ng ) ;
out :
return net ;
out_free :
kfree ( ng ) ;
goto out ;
}
static void net_free ( struct net * net )
{
2008-04-16 01:58:04 -07:00
# ifdef NETNS_REFCNT_DEBUG
2007-11-07 01:30:30 -08:00
if ( unlikely ( atomic_read ( & net - > use_count ) ! = 0 ) ) {
printk ( KERN_EMERG " network namespace not free! Usage: %d \n " ,
atomic_read ( & net - > use_count ) ) ;
return ;
}
2008-04-16 01:58:04 -07:00
# endif
2008-10-14 22:54:48 -07:00
kfree ( net - > gen ) ;
2007-11-07 01:30:30 -08:00
kmem_cache_free ( net_cachep , net ) ;
}
2009-05-04 11:12:14 -07:00
static struct net * net_create ( void )
2007-09-26 22:04:26 -07:00
{
2009-05-04 11:12:14 -07:00
struct net * net ;
int rv ;
2007-09-26 22:04:26 -07:00
2009-05-04 11:12:14 -07:00
net = net_alloc ( ) ;
if ( ! net )
return ERR_PTR ( - ENOMEM ) ;
2007-09-26 22:04:26 -07:00
mutex_lock ( & net_mutex ) ;
2009-05-04 11:12:14 -07:00
rv = setup_net ( net ) ;
if ( rv = = 0 ) {
2009-02-22 00:07:53 -08:00
rtnl_lock ( ) ;
2009-07-10 09:51:33 +00:00
list_add_tail_rcu ( & net - > list , & net_namespace_list ) ;
2009-02-22 00:07:53 -08:00
rtnl_unlock ( ) ;
}
2007-09-26 22:04:26 -07:00
mutex_unlock ( & net_mutex ) ;
2009-05-04 11:12:14 -07:00
if ( rv < 0 ) {
net_free ( net ) ;
return ERR_PTR ( rv ) ;
}
return net ;
}
2009-02-22 00:07:53 -08:00
2009-05-04 11:12:14 -07:00
struct net * copy_net_ns ( unsigned long flags , struct net * old_net )
{
if ( ! ( flags & CLONE_NEWNET ) )
return get_net ( old_net ) ;
return net_create ( ) ;
2007-09-26 22:04:26 -07:00
}
2009-11-29 22:25:27 +00:00
static DEFINE_SPINLOCK ( cleanup_list_lock ) ;
static LIST_HEAD ( cleanup_list ) ; /* Must hold cleanup_list_lock to touch */
2007-11-01 00:44:50 -07:00
static void cleanup_net ( struct work_struct * work )
{
2009-11-29 22:25:28 +00:00
const struct pernet_operations * ops ;
2009-11-29 22:25:27 +00:00
struct net * net , * tmp ;
LIST_HEAD ( net_kill_list ) ;
2009-12-03 02:29:03 +00:00
LIST_HEAD ( net_exit_list ) ;
2007-11-01 00:44:50 -07:00
2009-11-29 22:25:27 +00:00
/* Atomically snapshot the list of namespaces to cleanup */
spin_lock_irq ( & cleanup_list_lock ) ;
list_replace_init ( & cleanup_list , & net_kill_list ) ;
spin_unlock_irq ( & cleanup_list_lock ) ;
2007-11-01 00:44:50 -07:00
mutex_lock ( & net_mutex ) ;
/* Don't let anyone else find us. */
rtnl_lock ( ) ;
2009-12-03 02:29:03 +00:00
list_for_each_entry ( net , & net_kill_list , cleanup_list ) {
2009-11-29 22:25:27 +00:00
list_del_rcu ( & net - > list ) ;
2009-12-03 02:29:03 +00:00
list_add_tail ( & net - > exit_list , & net_exit_list ) ;
}
2007-11-01 00:44:50 -07:00
rtnl_unlock ( ) ;
2009-07-10 09:51:33 +00:00
/*
* Another CPU might be rcu - iterating the list , wait for it .
* This needs to be before calling the exit ( ) notifiers , so
* the rcu_barrier ( ) below isn ' t sufficient alone .
*/
synchronize_rcu ( ) ;
2007-11-01 00:44:50 -07:00
/* Run all of the network namespace exit methods */
2009-12-03 02:29:03 +00:00
list_for_each_entry_reverse ( ops , & pernet_list , list )
ops_exit_list ( ops , & net_exit_list ) ;
2009-11-29 22:25:28 +00:00
/* Free the net generic variables */
2009-12-03 02:29:03 +00:00
list_for_each_entry_reverse ( ops , & pernet_list , list )
ops_free_list ( ops , & net_exit_list ) ;
2007-11-01 00:44:50 -07:00
mutex_unlock ( & net_mutex ) ;
/* Ensure there are no outstanding rcu callbacks using this
* network namespace .
*/
rcu_barrier ( ) ;
/* Finally it is safe to free my network namespace structure */
2009-12-03 02:29:03 +00:00
list_for_each_entry_safe ( net , tmp , & net_exit_list , exit_list ) {
list_del_init ( & net - > exit_list ) ;
2009-11-29 22:25:27 +00:00
net_free ( net ) ;
}
2007-11-01 00:44:50 -07:00
}
2009-11-29 22:25:27 +00:00
static DECLARE_WORK ( net_cleanup_work , cleanup_net ) ;
2007-11-01 00:44:50 -07:00
void __put_net ( struct net * net )
{
/* Cleanup the network namespace in process context */
2009-11-29 22:25:27 +00:00
unsigned long flags ;
spin_lock_irqsave ( & cleanup_list_lock , flags ) ;
list_add ( & net - > cleanup_list , & cleanup_list ) ;
spin_unlock_irqrestore ( & cleanup_list_lock , flags ) ;
queue_work ( netns_wq , & net_cleanup_work ) ;
2007-11-01 00:44:50 -07:00
}
EXPORT_SYMBOL_GPL ( __put_net ) ;
# else
struct net * copy_net_ns ( unsigned long flags , struct net * old_net )
{
if ( flags & CLONE_NEWNET )
return ERR_PTR ( - EINVAL ) ;
return old_net ;
}
# endif
2009-07-10 09:51:35 +00:00
struct net * get_net_ns_by_pid ( pid_t pid )
{
struct task_struct * tsk ;
struct net * net ;
/* Lookup the network namespace */
net = ERR_PTR ( - ESRCH ) ;
rcu_read_lock ( ) ;
tsk = find_task_by_vpid ( pid ) ;
if ( tsk ) {
struct nsproxy * nsproxy ;
nsproxy = task_nsproxy ( tsk ) ;
if ( nsproxy )
net = get_net ( nsproxy - > net_ns ) ;
}
rcu_read_unlock ( ) ;
return net ;
}
EXPORT_SYMBOL_GPL ( get_net_ns_by_pid ) ;
2007-09-12 11:50:50 +02:00
static int __init net_ns_init ( void )
{
2009-02-22 00:07:53 -08:00
struct net_generic * ng ;
2007-09-12 11:50:50 +02:00
2007-11-01 00:46:50 -07:00
# ifdef CONFIG_NET_NS
2007-09-12 11:50:50 +02:00
net_cachep = kmem_cache_create ( " net_namespace " , sizeof ( struct net ) ,
SMP_CACHE_BYTES ,
SLAB_PANIC , NULL ) ;
2007-11-19 23:18:16 -08:00
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue ( " netns " ) ;
if ( ! netns_wq )
panic ( " Could not create netns workq " ) ;
2007-11-01 00:46:50 -07:00
# endif
2007-11-19 23:18:16 -08:00
2009-02-22 00:07:53 -08:00
ng = net_alloc_generic ( ) ;
if ( ! ng )
panic ( " Could not allocate generic netns " ) ;
rcu_assign_pointer ( init_net . gen , ng ) ;
2007-09-12 11:50:50 +02:00
mutex_lock ( & net_mutex ) ;
2009-05-21 15:10:31 -07:00
if ( setup_net ( & init_net ) )
panic ( " Could not setup the initial network namespace " ) ;
2007-09-12 11:50:50 +02:00
2007-09-26 22:40:08 -07:00
rtnl_lock ( ) ;
2009-07-10 09:51:33 +00:00
list_add_tail_rcu ( & init_net . list , & net_namespace_list ) ;
2007-09-26 22:40:08 -07:00
rtnl_unlock ( ) ;
2007-09-12 11:50:50 +02:00
mutex_unlock ( & net_mutex ) ;
return 0 ;
}
pure_initcall ( net_ns_init ) ;
2007-11-13 03:23:21 -08:00
# ifdef CONFIG_NET_NS
2009-11-29 22:25:28 +00:00
static int __register_pernet_operations ( struct list_head * list ,
struct pernet_operations * ops )
2007-09-12 11:50:50 +02:00
{
2009-12-03 02:29:03 +00:00
struct net * net ;
2007-09-12 11:50:50 +02:00
int error ;
2009-12-03 02:29:03 +00:00
LIST_HEAD ( net_exit_list ) ;
2007-09-12 11:50:50 +02:00
list_add_tail ( & ops - > list , list ) ;
2009-11-29 22:25:28 +00:00
if ( ops - > init | | ( ops - > id & & ops - > size ) ) {
2007-11-01 00:42:43 -07:00
for_each_net ( net ) {
2009-11-29 22:25:28 +00:00
error = ops_init ( ops , net ) ;
2007-09-12 11:50:50 +02:00
if ( error )
goto out_undo ;
2009-12-03 02:29:03 +00:00
list_add_tail ( & net - > exit_list , & net_exit_list ) ;
2007-09-12 11:50:50 +02:00
}
}
2007-11-01 00:42:43 -07:00
return 0 ;
2007-09-12 11:50:50 +02:00
out_undo :
/* If I have an error cleanup all namespaces I initialized */
list_del ( & ops - > list ) ;
2009-12-03 02:29:03 +00:00
ops_exit_list ( ops , & net_exit_list ) ;
ops_free_list ( ops , & net_exit_list ) ;
2007-11-01 00:42:43 -07:00
return error ;
2007-09-12 11:50:50 +02:00
}
2009-11-29 22:25:28 +00:00
static void __unregister_pernet_operations ( struct pernet_operations * ops )
2007-09-12 11:50:50 +02:00
{
struct net * net ;
2009-12-03 02:29:03 +00:00
LIST_HEAD ( net_exit_list ) ;
2007-09-12 11:50:50 +02:00
list_del ( & ops - > list ) ;
2009-12-03 02:29:03 +00:00
for_each_net ( net )
list_add_tail ( & net - > exit_list , & net_exit_list ) ;
ops_exit_list ( ops , & net_exit_list ) ;
ops_free_list ( ops , & net_exit_list ) ;
2007-09-12 11:50:50 +02:00
}
2007-11-13 03:23:21 -08:00
# else
2009-11-29 22:25:28 +00:00
static int __register_pernet_operations ( struct list_head * list ,
struct pernet_operations * ops )
2007-11-13 03:23:21 -08:00
{
2009-11-29 22:25:28 +00:00
int err = 0 ;
err = ops_init ( ops , & init_net ) ;
if ( err )
ops_free ( ops , & init_net ) ;
return err ;
2007-11-13 03:23:21 -08:00
}
2009-11-29 22:25:28 +00:00
static void __unregister_pernet_operations ( struct pernet_operations * ops )
2007-11-13 03:23:21 -08:00
{
2009-12-03 02:29:03 +00:00
LIST_HEAD ( net_exit_list ) ;
list_add ( & init_net . exit_list , & net_exit_list ) ;
ops_exit_list ( ops , & net_exit_list ) ;
ops_free_list ( ops , & net_exit_list ) ;
2007-11-13 03:23:21 -08:00
}
2009-11-29 22:25:28 +00:00
# endif /* CONFIG_NET_NS */
2007-11-13 03:23:21 -08:00
2008-04-15 00:35:23 -07:00
static DEFINE_IDA ( net_generic_ids ) ;
2009-11-29 22:25:28 +00:00
static int register_pernet_operations ( struct list_head * list ,
struct pernet_operations * ops )
{
int error ;
if ( ops - > id ) {
again :
error = ida_get_new_above ( & net_generic_ids , 1 , ops - > id ) ;
if ( error < 0 ) {
if ( error = = - EAGAIN ) {
ida_pre_get ( & net_generic_ids , GFP_KERNEL ) ;
goto again ;
}
return error ;
}
}
error = __register_pernet_operations ( list , ops ) ;
2009-12-03 02:29:06 +00:00
if ( error ) {
rcu_barrier ( ) ;
if ( ops - > id )
ida_remove ( & net_generic_ids , * ops - > id ) ;
}
2009-11-29 22:25:28 +00:00
return error ;
}
static void unregister_pernet_operations ( struct pernet_operations * ops )
{
__unregister_pernet_operations ( ops ) ;
2009-12-03 02:29:06 +00:00
rcu_barrier ( ) ;
2009-11-29 22:25:28 +00:00
if ( ops - > id )
ida_remove ( & net_generic_ids , * ops - > id ) ;
}
2007-09-12 11:50:50 +02:00
/**
* register_pernet_subsys - register a network namespace subsystem
* @ ops : pernet operations structure for the subsystem
*
* Register a subsystem which has init and exit functions
* that are called when network namespaces are created and
* destroyed respectively .
*
* When registered all network namespace init functions are
* called for every existing network namespace . Allowing kernel
* modules to have a race free view of the set of network namespaces .
*
* When a new network namespace is created all of the init
* methods are called in the order in which they were registered .
*
* When a network namespace is destroyed all of the exit methods
* are called in the reverse of the order with which they were
* registered .
*/
int register_pernet_subsys ( struct pernet_operations * ops )
{
int error ;
mutex_lock ( & net_mutex ) ;
error = register_pernet_operations ( first_device , ops ) ;
mutex_unlock ( & net_mutex ) ;
return error ;
}
EXPORT_SYMBOL_GPL ( register_pernet_subsys ) ;
/**
* unregister_pernet_subsys - unregister a network namespace subsystem
* @ ops : pernet operations structure to manipulate
*
* Remove the pernet operations structure from the list to be
2008-02-03 17:56:48 +02:00
* used when network namespaces are created or destroyed . In
2007-09-12 11:50:50 +02:00
* addition run the exit method for all existing network
* namespaces .
*/
2010-04-25 00:49:56 -07:00
void unregister_pernet_subsys ( struct pernet_operations * ops )
2007-09-12 11:50:50 +02:00
{
mutex_lock ( & net_mutex ) ;
2010-04-25 00:49:56 -07:00
unregister_pernet_operations ( ops ) ;
2007-09-12 11:50:50 +02:00
mutex_unlock ( & net_mutex ) ;
}
EXPORT_SYMBOL_GPL ( unregister_pernet_subsys ) ;
/**
* register_pernet_device - register a network namespace device
* @ ops : pernet operations structure for the subsystem
*
* Register a device which has init and exit functions
* that are called when network namespaces are created and
* destroyed respectively .
*
* When registered all network namespace init functions are
* called for every existing network namespace . Allowing kernel
* modules to have a race free view of the set of network namespaces .
*
* When a new network namespace is created all of the init
* methods are called in the order in which they were registered .
*
* When a network namespace is destroyed all of the exit methods
* are called in the reverse of the order with which they were
* registered .
*/
int register_pernet_device ( struct pernet_operations * ops )
{
int error ;
mutex_lock ( & net_mutex ) ;
error = register_pernet_operations ( & pernet_list , ops ) ;
if ( ! error & & ( first_device = = & pernet_list ) )
first_device = & ops - > list ;
mutex_unlock ( & net_mutex ) ;
return error ;
}
EXPORT_SYMBOL_GPL ( register_pernet_device ) ;
/**
* unregister_pernet_device - unregister a network namespace netdevice
* @ ops : pernet operations structure to manipulate
*
* Remove the pernet operations structure from the list to be
2008-02-03 17:56:48 +02:00
* used when network namespaces are created or destroyed . In
2007-09-12 11:50:50 +02:00
* addition run the exit method for all existing network
* namespaces .
*/
void unregister_pernet_device ( struct pernet_operations * ops )
{
mutex_lock ( & net_mutex ) ;
if ( & ops - > list = = first_device )
first_device = first_device - > next ;
unregister_pernet_operations ( ops ) ;
mutex_unlock ( & net_mutex ) ;
}
EXPORT_SYMBOL_GPL ( unregister_pernet_device ) ;