2005-04-16 15:20:36 -07:00
/* flow.c: Generic flow cache.
*
* Copyright ( C ) 2003 Alexey N . Kuznetsov ( kuznet @ ms2 . inr . ac . ru )
* Copyright ( C ) 2003 David S . Miller ( davem @ redhat . com )
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/list.h>
# include <linux/jhash.h>
# include <linux/interrupt.h>
# include <linux/mm.h>
# include <linux/random.h>
# include <linux/init.h>
# include <linux/slab.h>
# include <linux/smp.h>
# include <linux/completion.h>
# include <linux/percpu.h>
# include <linux/bitops.h>
# include <linux/notifier.h>
# include <linux/cpu.h>
# include <linux/cpumask.h>
2006-03-20 22:33:17 -08:00
# include <linux/mutex.h>
2005-04-16 15:20:36 -07:00
# include <net/flow.h>
# include <asm/atomic.h>
[LSM-IPSec]: Security association restriction.
This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets. Extensions to the SELinux LSM are
included that leverage the patch for this purpose.
This patch implements the changes necessary to the XFRM subsystem,
pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a
socket to use only authorized security associations (or no security
association) to send/receive network packets.
Patch purpose:
The patch is designed to enable access control per packets based on
the strongly authenticated IPSec security association. Such access
controls augment the existing ones based on network interface and IP
address. The former are very coarse-grained, and the latter can be
spoofed. By using IPSec, the system can control access to remote
hosts based on cryptographic keys generated using the IPSec mechanism.
This enables access control on a per-machine basis or per-application
if the remote machine is running the same mechanism and trusted to
enforce the access control policy.
Patch design approach:
The overall approach is that policy (xfrm_policy) entries set by
user-level programs (e.g., setkey for ipsec-tools) are extended with a
security context that is used at policy selection time in the XFRM
subsystem to restrict the sockets that can send/receive packets via
security associations (xfrm_states) that are built from those
policies.
A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.
Patch implementation details:
On output, the policy retrieved (via xfrm_policy_lookup or
xfrm_sk_policy_lookup) must be authorized for the security context of
the socket and the same security context is required for resultant
security association (retrieved or negotiated via racoon in
ipsec-tools). This is enforced in xfrm_state_find.
On input, the policy retrieved must also be authorized for the socket
(at __xfrm_policy_check), and the security context of the policy must
also match the security association being used.
The patch has virtually no impact on packets that do not use IPSec.
The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as
before.
Also, if IPSec is used without security contexts, the impact is
minimal. The LSM must allow such policies to be selected for the
combination of socket and remote machine, but subsequent IPSec
processing proceeds as in the original case.
Testing:
The pfkey interface is tested using the ipsec-tools. ipsec-tools have
been modified (a separate ipsec-tools patch is available for version
0.5) that supports assignment of xfrm_policy entries and security
associations with security contexts via setkey and the negotiation
using the security contexts via racoon.
The xfrm_user interface is tested via ad hoc programs that set
security contexts. These programs are also available from me, and
contain programs for setting, getting, and deleting policy for testing
this interface. Testing of sa functions was done by tracing kernel
behavior.
Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-12-13 23:12:27 -08:00
# include <linux/security.h>
2005-04-16 15:20:36 -07:00
struct flow_cache_entry {
struct flow_cache_entry * next ;
u16 family ;
u8 dir ;
u32 genid ;
2008-02-07 23:30:42 -08:00
struct flowi key ;
2005-04-16 15:20:36 -07:00
void * object ;
atomic_t * object_ref ;
} ;
atomic_t flow_cache_genid = ATOMIC_INIT ( 0 ) ;
static u32 flow_hash_shift ;
# define flow_hash_size (1 << flow_hash_shift)
static DEFINE_PER_CPU ( struct flow_cache_entry * * , flow_tables ) = { NULL } ;
# define flow_table(cpu) (per_cpu(flow_tables, cpu))
2006-12-06 20:33:20 -08:00
static struct kmem_cache * flow_cachep __read_mostly ;
2005-04-16 15:20:36 -07:00
static int flow_lwm , flow_hwm ;
struct flow_percpu_info {
int hash_rnd_recalc ;
u32 hash_rnd ;
int count ;
2008-02-07 18:03:18 -08:00
} ;
2005-04-16 15:20:36 -07:00
static DEFINE_PER_CPU ( struct flow_percpu_info , flow_hash_info ) = { 0 } ;
# define flow_hash_rnd_recalc(cpu) \
( per_cpu ( flow_hash_info , cpu ) . hash_rnd_recalc )
# define flow_hash_rnd(cpu) \
( per_cpu ( flow_hash_info , cpu ) . hash_rnd )
# define flow_count(cpu) \
( per_cpu ( flow_hash_info , cpu ) . count )
static struct timer_list flow_hash_rnd_timer ;
# define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
struct flow_flush_info {
atomic_t cpuleft ;
struct completion completion ;
} ;
static DEFINE_PER_CPU ( struct tasklet_struct , flow_flush_tasklets ) = { NULL } ;
# define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
static void flow_cache_new_hashrnd ( unsigned long arg )
{
int i ;
2006-04-10 22:52:50 -07:00
for_each_possible_cpu ( i )
2005-04-16 15:20:36 -07:00
flow_hash_rnd_recalc ( i ) = 1 ;
flow_hash_rnd_timer . expires = jiffies + FLOW_HASH_RND_PERIOD ;
add_timer ( & flow_hash_rnd_timer ) ;
}
IPsec: propagate security module errors up from flow_cache_lookup
When a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return an access denied permission
(or other error). We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.
The way I was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.
The first SYNACK would be blocked, because of an uncached lookup via
flow_cache_lookup(), which would fail to resolve an xfrm policy because
the SELinux policy is checked at that point via the resolver.
However, retransmitted SYNACKs would then find a cached flow entry when
calling into flow_cache_lookup() with a null xfrm policy, which is
interpreted by xfrm_lookup() as the packet not having any associated
policy and similarly to the first case, allowing it to pass without
transformation.
The solution presented here is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.
Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely). This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).
Signed-off-by: James Morris <jmorris@namei.org>
2006-10-05 15:42:27 -05:00
static void flow_entry_kill ( int cpu , struct flow_cache_entry * fle )
{
if ( fle - > object )
atomic_dec ( fle - > object_ref ) ;
kmem_cache_free ( flow_cachep , fle ) ;
flow_count ( cpu ) - - ;
}
2005-04-16 15:20:36 -07:00
static void __flow_cache_shrink ( int cpu , int shrink_to )
{
struct flow_cache_entry * fle , * * flp ;
int i ;
for ( i = 0 ; i < flow_hash_size ; i + + ) {
int k = 0 ;
flp = & flow_table ( cpu ) [ i ] ;
while ( ( fle = * flp ) ! = NULL & & k < shrink_to ) {
k + + ;
flp = & fle - > next ;
}
while ( ( fle = * flp ) ! = NULL ) {
* flp = fle - > next ;
IPsec: propagate security module errors up from flow_cache_lookup
When a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return an access denied permission
(or other error). We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.
The way I was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.
The first SYNACK would be blocked, because of an uncached lookup via
flow_cache_lookup(), which would fail to resolve an xfrm policy because
the SELinux policy is checked at that point via the resolver.
However, retransmitted SYNACKs would then find a cached flow entry when
calling into flow_cache_lookup() with a null xfrm policy, which is
interpreted by xfrm_lookup() as the packet not having any associated
policy and similarly to the first case, allowing it to pass without
transformation.
The solution presented here is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.
Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely). This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).
Signed-off-by: James Morris <jmorris@namei.org>
2006-10-05 15:42:27 -05:00
flow_entry_kill ( cpu , fle ) ;
2005-04-16 15:20:36 -07:00
}
}
}
static void flow_cache_shrink ( int cpu )
{
int shrink_to = flow_lwm / flow_hash_size ;
__flow_cache_shrink ( cpu , shrink_to ) ;
}
static void flow_new_hash_rnd ( int cpu )
{
get_random_bytes ( & flow_hash_rnd ( cpu ) , sizeof ( u32 ) ) ;
flow_hash_rnd_recalc ( cpu ) = 0 ;
__flow_cache_shrink ( cpu , 0 ) ;
}
static u32 flow_hash_code ( struct flowi * key , int cpu )
{
u32 * k = ( u32 * ) key ;
return ( jhash2 ( k , ( sizeof ( * key ) / sizeof ( u32 ) ) , flow_hash_rnd ( cpu ) ) &
( flow_hash_size - 1 ) ) ;
}
# if (BITS_PER_LONG == 64)
typedef u64 flow_compare_t ;
# else
typedef u32 flow_compare_t ;
# endif
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here , such as alignment and
* constant size .
*/
static int flow_key_compare ( struct flowi * key1 , struct flowi * key2 )
{
flow_compare_t * k1 , * k1_lim , * k2 ;
const int n_elem = sizeof ( struct flowi ) / sizeof ( flow_compare_t ) ;
2007-10-23 21:15:21 -07:00
BUILD_BUG_ON ( sizeof ( struct flowi ) % sizeof ( flow_compare_t ) ) ;
2005-04-16 15:20:36 -07:00
k1 = ( flow_compare_t * ) key1 ;
k1_lim = k1 + n_elem ;
k2 = ( flow_compare_t * ) key2 ;
do {
if ( * k1 + + ! = * k2 + + )
return 1 ;
} while ( k1 < k1_lim ) ;
return 0 ;
}
2008-11-25 17:35:18 -08:00
void * flow_cache_lookup ( struct net * net , struct flowi * key , u16 family , u8 dir ,
2005-04-16 15:20:36 -07:00
flow_resolve_t resolver )
{
struct flow_cache_entry * fle , * * head ;
unsigned int hash ;
int cpu ;
local_bh_disable ( ) ;
cpu = smp_processor_id ( ) ;
fle = NULL ;
/* Packet really early in init? Making flow_cache_init a
* pre - smp initcall would solve this . - - RR */
if ( ! flow_table ( cpu ) )
goto nocache ;
if ( flow_hash_rnd_recalc ( cpu ) )
flow_new_hash_rnd ( cpu ) ;
hash = flow_hash_code ( key , cpu ) ;
head = & flow_table ( cpu ) [ hash ] ;
for ( fle = * head ; fle ; fle = fle - > next ) {
if ( fle - > family = = family & &
fle - > dir = = dir & &
flow_key_compare ( key , & fle - > key ) = = 0 ) {
if ( fle - > genid = = atomic_read ( & flow_cache_genid ) ) {
void * ret = fle - > object ;
if ( ret )
atomic_inc ( fle - > object_ref ) ;
local_bh_enable ( ) ;
return ret ;
}
break ;
}
}
if ( ! fle ) {
if ( flow_count ( cpu ) > flow_hwm )
flow_cache_shrink ( cpu ) ;
2006-12-06 20:33:16 -08:00
fle = kmem_cache_alloc ( flow_cachep , GFP_ATOMIC ) ;
2005-04-16 15:20:36 -07:00
if ( fle ) {
fle - > next = * head ;
* head = fle ;
fle - > family = family ;
fle - > dir = dir ;
memcpy ( & fle - > key , key , sizeof ( * key ) ) ;
fle - > object = NULL ;
flow_count ( cpu ) + + ;
}
}
nocache :
{
IPsec: propagate security module errors up from flow_cache_lookup
When a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return an access denied permission
(or other error). We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.
The way I was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.
The first SYNACK would be blocked, because of an uncached lookup via
flow_cache_lookup(), which would fail to resolve an xfrm policy because
the SELinux policy is checked at that point via the resolver.
However, retransmitted SYNACKs would then find a cached flow entry when
calling into flow_cache_lookup() with a null xfrm policy, which is
interpreted by xfrm_lookup() as the packet not having any associated
policy and similarly to the first case, allowing it to pass without
transformation.
The solution presented here is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.
Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely). This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).
Signed-off-by: James Morris <jmorris@namei.org>
2006-10-05 15:42:27 -05:00
int err ;
2005-04-16 15:20:36 -07:00
void * obj ;
atomic_t * obj_ref ;
2008-11-25 17:35:18 -08:00
err = resolver ( net , key , family , dir , & obj , & obj_ref ) ;
2005-04-16 15:20:36 -07:00
2007-01-10 22:06:32 -08:00
if ( fle & & ! err ) {
fle - > genid = atomic_read ( & flow_cache_genid ) ;
if ( fle - > object )
atomic_dec ( fle - > object_ref ) ;
fle - > object = obj ;
fle - > object_ref = obj_ref ;
if ( obj )
atomic_inc ( fle - > object_ref ) ;
2005-04-16 15:20:36 -07:00
}
local_bh_enable ( ) ;
IPsec: propagate security module errors up from flow_cache_lookup
When a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return an access denied permission
(or other error). We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.
The way I was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.
The first SYNACK would be blocked, because of an uncached lookup via
flow_cache_lookup(), which would fail to resolve an xfrm policy because
the SELinux policy is checked at that point via the resolver.
However, retransmitted SYNACKs would then find a cached flow entry when
calling into flow_cache_lookup() with a null xfrm policy, which is
interpreted by xfrm_lookup() as the packet not having any associated
policy and similarly to the first case, allowing it to pass without
transformation.
The solution presented here is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.
Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely). This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).
Signed-off-by: James Morris <jmorris@namei.org>
2006-10-05 15:42:27 -05:00
if ( err )
obj = ERR_PTR ( err ) ;
2005-04-16 15:20:36 -07:00
return obj ;
}
}
static void flow_cache_flush_tasklet ( unsigned long data )
{
struct flow_flush_info * info = ( void * ) data ;
int i ;
int cpu ;
cpu = smp_processor_id ( ) ;
for ( i = 0 ; i < flow_hash_size ; i + + ) {
struct flow_cache_entry * fle ;
fle = flow_table ( cpu ) [ i ] ;
for ( ; fle ; fle = fle - > next ) {
unsigned genid = atomic_read ( & flow_cache_genid ) ;
if ( ! fle - > object | | fle - > genid = = genid )
continue ;
fle - > object = NULL ;
atomic_dec ( fle - > object_ref ) ;
}
}
if ( atomic_dec_and_test ( & info - > cpuleft ) )
complete ( & info - > completion ) ;
}
static void flow_cache_flush_per_cpu ( void * ) __attribute__ ( ( __unused__ ) ) ;
static void flow_cache_flush_per_cpu ( void * data )
{
struct flow_flush_info * info = data ;
int cpu ;
struct tasklet_struct * tasklet ;
cpu = smp_processor_id ( ) ;
tasklet = flow_flush_tasklet ( cpu ) ;
tasklet - > data = ( unsigned long ) info ;
tasklet_schedule ( tasklet ) ;
}
void flow_cache_flush ( void )
{
struct flow_flush_info info ;
2006-03-20 22:33:17 -08:00
static DEFINE_MUTEX ( flow_flush_sem ) ;
2005-04-16 15:20:36 -07:00
/* Don't want cpus going down or up during this. */
2008-01-25 21:08:02 +01:00
get_online_cpus ( ) ;
2006-03-20 22:33:17 -08:00
mutex_lock ( & flow_flush_sem ) ;
2005-04-16 15:20:36 -07:00
atomic_set ( & info . cpuleft , num_online_cpus ( ) ) ;
init_completion ( & info . completion ) ;
local_bh_disable ( ) ;
2008-06-06 11:18:06 +02:00
smp_call_function ( flow_cache_flush_per_cpu , & info , 0 ) ;
2005-04-16 15:20:36 -07:00
flow_cache_flush_tasklet ( ( unsigned long ) & info ) ;
local_bh_enable ( ) ;
wait_for_completion ( & info . completion ) ;
2006-03-20 22:33:17 -08:00
mutex_unlock ( & flow_flush_sem ) ;
2008-01-25 21:08:02 +01:00
put_online_cpus ( ) ;
2005-04-16 15:20:36 -07:00
}
2008-11-06 23:06:44 -08:00
static void __init flow_cache_cpu_prepare ( int cpu )
2005-04-16 15:20:36 -07:00
{
struct tasklet_struct * tasklet ;
unsigned long order ;
for ( order = 0 ;
( PAGE_SIZE < < order ) <
( sizeof ( struct flow_cache_entry * ) * flow_hash_size ) ;
order + + )
/* NOTHING */ ;
flow_table ( cpu ) = ( struct flow_cache_entry * * )
2006-04-07 14:52:59 -07:00
__get_free_pages ( GFP_KERNEL | __GFP_ZERO , order ) ;
2005-04-16 15:20:36 -07:00
if ( ! flow_table ( cpu ) )
panic ( " NET: failed to allocate flow cache order %lu \n " , order ) ;
flow_hash_rnd_recalc ( cpu ) = 1 ;
flow_count ( cpu ) = 0 ;
tasklet = flow_flush_tasklet ( cpu ) ;
tasklet_init ( tasklet , flow_cache_flush_tasklet , 0 ) ;
}
static int flow_cache_cpu ( struct notifier_block * nfb ,
unsigned long action ,
void * hcpu )
{
2007-05-09 02:35:10 -07:00
if ( action = = CPU_DEAD | | action = = CPU_DEAD_FROZEN )
2005-04-16 15:20:36 -07:00
__flow_cache_shrink ( ( unsigned long ) hcpu , 0 ) ;
return NOTIFY_OK ;
}
static int __init flow_cache_init ( void )
{
int i ;
flow_cachep = kmem_cache_create ( " flow_cache " ,
sizeof ( struct flow_cache_entry ) ,
2008-02-07 23:30:42 -08:00
0 , SLAB_PANIC ,
2007-07-20 10:11:58 +09:00
NULL ) ;
2005-04-16 15:20:36 -07:00
flow_hash_shift = 10 ;
flow_lwm = 2 * flow_hash_size ;
flow_hwm = 4 * flow_hash_size ;
2008-01-23 21:20:07 -08:00
setup_timer ( & flow_hash_rnd_timer , flow_cache_new_hashrnd , 0 ) ;
2005-04-16 15:20:36 -07:00
flow_hash_rnd_timer . expires = jiffies + FLOW_HASH_RND_PERIOD ;
add_timer ( & flow_hash_rnd_timer ) ;
2006-04-10 22:52:50 -07:00
for_each_possible_cpu ( i )
2005-04-16 15:20:36 -07:00
flow_cache_cpu_prepare ( i ) ;
hotcpu_notifier ( flow_cache_cpu , 0 ) ;
return 0 ;
}
module_init ( flow_cache_init ) ;
EXPORT_SYMBOL ( flow_cache_genid ) ;
EXPORT_SYMBOL ( flow_cache_lookup ) ;