2015-10-08 00:27:43 +03:00
/* License: GPL */
2011-12-06 11:59:52 +04:00
# include <linux/mutex.h>
# include <linux/socket.h>
# include <linux/skbuff.h>
# include <net/netlink.h>
# include <net/net_namespace.h>
# include <linux/module.h>
2011-12-30 04:53:13 +04:00
# include <net/sock.h>
2015-06-15 18:26:18 +03:00
# include <linux/kernel.h>
# include <linux/tcp.h>
# include <linux/workqueue.h>
2011-12-06 11:59:52 +04:00
# include <linux/inet_diag.h>
# include <linux/sock_diag.h>
2012-04-24 22:21:07 +04:00
static const struct sock_diag_handler * sock_diag_handlers [ AF_MAX ] ;
2011-12-06 11:59:52 +04:00
static int ( * inet_rcv_compat ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) ;
static DEFINE_MUTEX ( sock_diag_table_mutex ) ;
2015-06-15 18:26:18 +03:00
static struct workqueue_struct * broadcast_wq ;
2011-12-06 11:59:52 +04:00
2015-03-12 04:53:14 +03:00
static u64 sock_gen_cookie ( struct sock * sk )
2011-12-15 06:43:44 +04:00
{
2015-03-12 04:53:14 +03:00
while ( 1 ) {
u64 res = atomic64_read ( & sk - > sk_cookie ) ;
if ( res )
return res ;
res = atomic64_inc_return ( & sock_net ( sk ) - > cookie_gen ) ;
atomic64_cmpxchg ( & sk - > sk_cookie , 0 , res ) ;
}
}
int sock_diag_check_cookie ( struct sock * sk , const __u32 * cookie )
{
u64 res ;
if ( cookie [ 0 ] = = INET_DIAG_NOCOOKIE & & cookie [ 1 ] = = INET_DIAG_NOCOOKIE )
2011-12-15 06:43:44 +04:00
return 0 ;
2015-03-12 04:53:14 +03:00
res = sock_gen_cookie ( sk ) ;
if ( ( u32 ) res ! = cookie [ 0 ] | | ( u32 ) ( res > > 32 ) ! = cookie [ 1 ] )
return - ESTALE ;
return 0 ;
2011-12-15 06:43:44 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_check_cookie ) ;
2015-03-12 04:53:14 +03:00
void sock_diag_save_cookie ( struct sock * sk , __u32 * cookie )
2011-12-15 06:43:44 +04:00
{
2015-03-12 04:53:14 +03:00
u64 res = sock_gen_cookie ( sk ) ;
cookie [ 0 ] = ( u32 ) res ;
cookie [ 1 ] = ( u32 ) ( res > > 32 ) ;
2011-12-15 06:43:44 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_save_cookie ) ;
2011-12-30 04:53:13 +04:00
int sock_diag_put_meminfo ( struct sock * sk , struct sk_buff * skb , int attrtype )
{
2012-06-27 03:36:11 +04:00
u32 mem [ SK_MEMINFO_VARS ] ;
2011-12-30 04:53:13 +04:00
mem [ SK_MEMINFO_RMEM_ALLOC ] = sk_rmem_alloc_get ( sk ) ;
mem [ SK_MEMINFO_RCVBUF ] = sk - > sk_rcvbuf ;
mem [ SK_MEMINFO_WMEM_ALLOC ] = sk_wmem_alloc_get ( sk ) ;
mem [ SK_MEMINFO_SNDBUF ] = sk - > sk_sndbuf ;
mem [ SK_MEMINFO_FWD_ALLOC ] = sk - > sk_forward_alloc ;
mem [ SK_MEMINFO_WMEM_QUEUED ] = sk - > sk_wmem_queued ;
mem [ SK_MEMINFO_OPTMEM ] = atomic_read ( & sk - > sk_omem_alloc ) ;
2012-06-04 07:50:35 +04:00
mem [ SK_MEMINFO_BACKLOG ] = sk - > sk_backlog . len ;
2011-12-30 04:53:13 +04:00
2012-06-27 03:36:11 +04:00
return nla_put ( skb , attrtype , sizeof ( mem ) , & mem ) ;
2011-12-30 04:53:13 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_put_meminfo ) ;
2014-04-24 01:26:25 +04:00
int sock_diag_put_filterinfo ( bool may_report_filterinfo , struct sock * sk ,
2013-04-25 10:53:54 +04:00
struct sk_buff * skb , int attrtype )
{
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
struct sock_fprog_kern * fprog ;
2013-04-25 10:53:54 +04:00
struct sk_filter * filter ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
struct nlattr * attr ;
unsigned int flen ;
2013-04-25 10:53:54 +04:00
int err = 0 ;
2014-04-24 01:26:25 +04:00
if ( ! may_report_filterinfo ) {
2013-04-25 10:53:54 +04:00
nla_reserve ( skb , attrtype , 0 ) ;
return 0 ;
}
rcu_read_lock ( ) ;
filter = rcu_dereference ( sk - > sk_filter ) ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
if ( ! filter )
goto out ;
2013-04-25 10:53:54 +04:00
net: filter: split 'struct sk_filter' into socket and bpf parts
clean up names related to socket filtering and bpf in the following way:
- everything that deals with sockets keeps 'sk_*' prefix
- everything that is pure BPF is changed to 'bpf_*' prefix
split 'struct sk_filter' into
struct sk_filter {
atomic_t refcnt;
struct rcu_head rcu;
struct bpf_prog *prog;
};
and
struct bpf_prog {
u32 jited:1,
len:31;
struct sock_fprog_kern *orig_prog;
unsigned int (*bpf_func)(const struct sk_buff *skb,
const struct bpf_insn *filter);
union {
struct sock_filter insns[0];
struct bpf_insn insnsi[0];
struct work_struct work;
};
};
so that 'struct bpf_prog' can be used independent of sockets and cleans up
'unattached' bpf use cases
split SK_RUN_FILTER macro into:
SK_RUN_FILTER to be used with 'struct sk_filter *' and
BPF_PROG_RUN to be used with 'struct bpf_prog *'
__sk_filter_release(struct sk_filter *) gains
__bpf_prog_release(struct bpf_prog *) helper function
also perform related renames for the functions that work
with 'struct bpf_prog *', since they're on the same lines:
sk_filter_size -> bpf_prog_size
sk_filter_select_runtime -> bpf_prog_select_runtime
sk_filter_free -> bpf_prog_free
sk_unattached_filter_create -> bpf_prog_create
sk_unattached_filter_destroy -> bpf_prog_destroy
sk_store_orig_filter -> bpf_prog_store_orig_filter
sk_release_orig_filter -> bpf_release_orig_filter
__sk_migrate_filter -> bpf_migrate_filter
__sk_prepare_filter -> bpf_prepare_filter
API for attaching classic BPF to a socket stays the same:
sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *)
and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program
which is used by sockets, tun, af_packet
API for 'unattached' BPF programs becomes:
bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *)
and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program
which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-31 07:34:16 +04:00
fprog = filter - > prog - > orig_prog ;
2015-09-02 15:00:36 +03:00
if ( ! fprog )
goto out ;
2014-07-31 07:34:13 +04:00
flen = bpf_classic_proglen ( fprog ) ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
attr = nla_reserve ( skb , attrtype , flen ) ;
2013-04-25 10:53:54 +04:00
if ( attr = = NULL ) {
err = - EMSGSIZE ;
goto out ;
}
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
memcpy ( nla_data ( attr ) , fprog - > filter , flen ) ;
2013-04-25 10:53:54 +04:00
out :
rcu_read_unlock ( ) ;
return err ;
}
EXPORT_SYMBOL ( sock_diag_put_filterinfo ) ;
2015-06-15 18:26:18 +03:00
struct broadcast_sk {
struct sock * sk ;
struct work_struct work ;
} ;
static size_t sock_diag_nlmsg_size ( void )
{
return NLMSG_ALIGN ( sizeof ( struct inet_diag_msg )
+ nla_total_size ( sizeof ( u8 ) ) /* INET_DIAG_PROTOCOL */
+ nla_total_size ( sizeof ( struct tcp_info ) ) ) ; /* INET_DIAG_INFO */
}
static void sock_diag_broadcast_destroy_work ( struct work_struct * work )
{
struct broadcast_sk * bsk =
container_of ( work , struct broadcast_sk , work ) ;
struct sock * sk = bsk - > sk ;
const struct sock_diag_handler * hndl ;
struct sk_buff * skb ;
const enum sknetlink_groups group = sock_diag_destroy_group ( sk ) ;
int err = - 1 ;
WARN_ON ( group = = SKNLGRP_NONE ) ;
skb = nlmsg_new ( sock_diag_nlmsg_size ( ) , GFP_KERNEL ) ;
if ( ! skb )
goto out ;
mutex_lock ( & sock_diag_table_mutex ) ;
hndl = sock_diag_handlers [ sk - > sk_family ] ;
if ( hndl & & hndl - > get_info )
err = hndl - > get_info ( skb , sk ) ;
mutex_unlock ( & sock_diag_table_mutex ) ;
if ( ! err )
nlmsg_multicast ( sock_net ( sk ) - > diag_nlsk , skb , 0 , group ,
GFP_KERNEL ) ;
else
kfree_skb ( skb ) ;
out :
sk_destruct ( sk ) ;
kfree ( bsk ) ;
}
void sock_diag_broadcast_destroy ( struct sock * sk )
{
/* Note, this function is often called from an interrupt context. */
struct broadcast_sk * bsk =
kmalloc ( sizeof ( struct broadcast_sk ) , GFP_ATOMIC ) ;
if ( ! bsk )
return sk_destruct ( sk ) ;
bsk - > sk = sk ;
INIT_WORK ( & bsk - > work , sock_diag_broadcast_destroy_work ) ;
queue_work ( broadcast_wq , & bsk - > work ) ;
}
2011-12-06 11:59:52 +04:00
void sock_diag_register_inet_compat ( int ( * fn ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) )
{
mutex_lock ( & sock_diag_table_mutex ) ;
inet_rcv_compat = fn ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_register_inet_compat ) ;
void sock_diag_unregister_inet_compat ( int ( * fn ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) )
{
mutex_lock ( & sock_diag_table_mutex ) ;
inet_rcv_compat = NULL ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_unregister_inet_compat ) ;
2012-04-24 22:21:07 +04:00
int sock_diag_register ( const struct sock_diag_handler * hndl )
2011-12-06 11:59:52 +04:00
{
int err = 0 ;
2011-12-08 00:49:38 +04:00
if ( hndl - > family > = AF_MAX )
2011-12-06 11:59:52 +04:00
return - EINVAL ;
mutex_lock ( & sock_diag_table_mutex ) ;
if ( sock_diag_handlers [ hndl - > family ] )
err = - EBUSY ;
else
sock_diag_handlers [ hndl - > family ] = hndl ;
mutex_unlock ( & sock_diag_table_mutex ) ;
return err ;
}
EXPORT_SYMBOL_GPL ( sock_diag_register ) ;
2012-04-24 22:21:07 +04:00
void sock_diag_unregister ( const struct sock_diag_handler * hnld )
2011-12-06 11:59:52 +04:00
{
int family = hnld - > family ;
2011-12-08 00:49:38 +04:00
if ( family > = AF_MAX )
2011-12-06 11:59:52 +04:00
return ;
mutex_lock ( & sock_diag_table_mutex ) ;
BUG_ON ( sock_diag_handlers [ family ] ! = hnld ) ;
sock_diag_handlers [ family ] = NULL ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_unregister ) ;
static int __sock_diag_rcv_msg ( struct sk_buff * skb , struct nlmsghdr * nlh )
{
int err ;
2012-06-27 03:36:11 +04:00
struct sock_diag_req * req = nlmsg_data ( nlh ) ;
2012-04-24 22:21:07 +04:00
const struct sock_diag_handler * hndl ;
2011-12-06 11:59:52 +04:00
if ( nlmsg_len ( nlh ) < sizeof ( * req ) )
return - EINVAL ;
2013-02-23 05:13:47 +04:00
if ( req - > sdiag_family > = AF_MAX )
return - EINVAL ;
2013-02-23 05:13:48 +04:00
if ( sock_diag_handlers [ req - > sdiag_family ] = = NULL )
request_module ( " net-pf-%d-proto-%d-type-%d " , PF_NETLINK ,
NETLINK_SOCK_DIAG , req - > sdiag_family ) ;
mutex_lock ( & sock_diag_table_mutex ) ;
hndl = sock_diag_handlers [ req - > sdiag_family ] ;
2011-12-06 11:59:52 +04:00
if ( hndl = = NULL )
err = - ENOENT ;
else
err = hndl - > dump ( skb , nlh ) ;
2013-02-23 05:13:48 +04:00
mutex_unlock ( & sock_diag_table_mutex ) ;
2011-12-06 11:59:52 +04:00
return err ;
}
static int sock_diag_rcv_msg ( struct sk_buff * skb , struct nlmsghdr * nlh )
{
int ret ;
switch ( nlh - > nlmsg_type ) {
case TCPDIAG_GETSOCK :
case DCCPDIAG_GETSOCK :
if ( inet_rcv_compat = = NULL )
request_module ( " net-pf-%d-proto-%d-type-%d " , PF_NETLINK ,
2011-12-15 06:43:27 +04:00
NETLINK_SOCK_DIAG , AF_INET ) ;
2011-12-06 11:59:52 +04:00
mutex_lock ( & sock_diag_table_mutex ) ;
if ( inet_rcv_compat ! = NULL )
ret = inet_rcv_compat ( skb , nlh ) ;
else
ret = - EOPNOTSUPP ;
mutex_unlock ( & sock_diag_table_mutex ) ;
return ret ;
case SOCK_DIAG_BY_FAMILY :
return __sock_diag_rcv_msg ( skb , nlh ) ;
default :
return - EINVAL ;
}
}
static DEFINE_MUTEX ( sock_diag_mutex ) ;
static void sock_diag_rcv ( struct sk_buff * skb )
{
mutex_lock ( & sock_diag_mutex ) ;
netlink_rcv_skb ( skb , & sock_diag_rcv_msg ) ;
mutex_unlock ( & sock_diag_mutex ) ;
}
2015-06-15 18:26:18 +03:00
static int sock_diag_bind ( struct net * net , int group )
{
switch ( group ) {
case SKNLGRP_INET_TCP_DESTROY :
case SKNLGRP_INET_UDP_DESTROY :
if ( ! sock_diag_handlers [ AF_INET ] )
request_module ( " net-pf-%d-proto-%d-type-%d " , PF_NETLINK ,
NETLINK_SOCK_DIAG , AF_INET ) ;
break ;
case SKNLGRP_INET6_TCP_DESTROY :
case SKNLGRP_INET6_UDP_DESTROY :
if ( ! sock_diag_handlers [ AF_INET6 ] )
request_module ( " net-pf-%d-proto-%d-type-%d " , PF_NETLINK ,
NETLINK_SOCK_DIAG , AF_INET ) ;
break ;
}
return 0 ;
}
2012-07-16 08:28:49 +04:00
static int __net_init diag_net_init ( struct net * net )
2011-12-06 11:59:52 +04:00
{
2012-06-29 10:15:21 +04:00
struct netlink_kernel_cfg cfg = {
2015-06-15 18:26:18 +03:00
. groups = SKNLGRP_MAX ,
2012-06-29 10:15:21 +04:00
. input = sock_diag_rcv ,
2015-06-15 18:26:18 +03:00
. bind = sock_diag_bind ,
. flags = NL_CFG_F_NONROOT_RECV ,
2012-06-29 10:15:21 +04:00
} ;
2012-09-08 06:53:54 +04:00
net - > diag_nlsk = netlink_kernel_create ( net , NETLINK_SOCK_DIAG , & cfg ) ;
2012-07-16 08:28:49 +04:00
return net - > diag_nlsk = = NULL ? - ENOMEM : 0 ;
}
static void __net_exit diag_net_exit ( struct net * net )
{
netlink_kernel_release ( net - > diag_nlsk ) ;
net - > diag_nlsk = NULL ;
}
static struct pernet_operations diag_net_ops = {
. init = diag_net_init ,
. exit = diag_net_exit ,
} ;
static int __init sock_diag_init ( void )
{
2015-06-15 18:26:18 +03:00
broadcast_wq = alloc_workqueue ( " sock_diag_events " , 0 , 0 ) ;
BUG_ON ( ! broadcast_wq ) ;
2012-07-16 08:28:49 +04:00
return register_pernet_subsys ( & diag_net_ops ) ;
2011-12-06 11:59:52 +04:00
}
2015-10-08 00:27:43 +03:00
device_initcall ( sock_diag_init ) ;