2015-10-08 00:27:43 +03:00
/* License: GPL */
2011-12-06 11:59:52 +04:00
# include <linux/mutex.h>
# include <linux/socket.h>
# include <linux/skbuff.h>
# include <net/netlink.h>
# include <net/net_namespace.h>
# include <linux/module.h>
2011-12-30 04:53:13 +04:00
# include <net/sock.h>
2015-06-15 18:26:18 +03:00
# include <linux/kernel.h>
# include <linux/tcp.h>
# include <linux/workqueue.h>
2018-08-14 01:23:13 +03:00
# include <linux/nospec.h>
2020-09-30 18:18:16 +03:00
# include <linux/cookie.h>
2011-12-06 11:59:52 +04:00
# include <linux/inet_diag.h>
# include <linux/sock_diag.h>
2012-04-24 22:21:07 +04:00
static const struct sock_diag_handler * sock_diag_handlers [ AF_MAX ] ;
2011-12-06 11:59:52 +04:00
static int ( * inet_rcv_compat ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) ;
static DEFINE_MUTEX ( sock_diag_table_mutex ) ;
2015-06-15 18:26:18 +03:00
static struct workqueue_struct * broadcast_wq ;
2011-12-06 11:59:52 +04:00
2020-09-30 18:18:16 +03:00
DEFINE_COOKIE ( sock_cookie ) ;
u64 __sock_gen_cookie ( struct sock * sk )
2011-12-15 06:43:44 +04:00
{
2015-03-12 04:53:14 +03:00
while ( 1 ) {
u64 res = atomic64_read ( & sk - > sk_cookie ) ;
if ( res )
return res ;
2020-09-30 18:18:16 +03:00
res = gen_cookie_next ( & sock_cookie ) ;
2015-03-12 04:53:14 +03:00
atomic64_cmpxchg ( & sk - > sk_cookie , 0 , res ) ;
}
}
int sock_diag_check_cookie ( struct sock * sk , const __u32 * cookie )
{
u64 res ;
if ( cookie [ 0 ] = = INET_DIAG_NOCOOKIE & & cookie [ 1 ] = = INET_DIAG_NOCOOKIE )
2011-12-15 06:43:44 +04:00
return 0 ;
2015-03-12 04:53:14 +03:00
res = sock_gen_cookie ( sk ) ;
if ( ( u32 ) res ! = cookie [ 0 ] | | ( u32 ) ( res > > 32 ) ! = cookie [ 1 ] )
return - ESTALE ;
return 0 ;
2011-12-15 06:43:44 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_check_cookie ) ;
2015-03-12 04:53:14 +03:00
void sock_diag_save_cookie ( struct sock * sk , __u32 * cookie )
2011-12-15 06:43:44 +04:00
{
2015-03-12 04:53:14 +03:00
u64 res = sock_gen_cookie ( sk ) ;
cookie [ 0 ] = ( u32 ) res ;
cookie [ 1 ] = ( u32 ) ( res > > 32 ) ;
2011-12-15 06:43:44 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_save_cookie ) ;
2011-12-30 04:53:13 +04:00
int sock_diag_put_meminfo ( struct sock * sk , struct sk_buff * skb , int attrtype )
{
2012-06-27 03:36:11 +04:00
u32 mem [ SK_MEMINFO_VARS ] ;
2011-12-30 04:53:13 +04:00
2017-03-20 22:22:03 +03:00
sk_get_meminfo ( sk , mem ) ;
2011-12-30 04:53:13 +04:00
2012-06-27 03:36:11 +04:00
return nla_put ( skb , attrtype , sizeof ( mem ) , & mem ) ;
2011-12-30 04:53:13 +04:00
}
EXPORT_SYMBOL_GPL ( sock_diag_put_meminfo ) ;
2014-04-24 01:26:25 +04:00
int sock_diag_put_filterinfo ( bool may_report_filterinfo , struct sock * sk ,
2013-04-25 10:53:54 +04:00
struct sk_buff * skb , int attrtype )
{
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
struct sock_fprog_kern * fprog ;
2013-04-25 10:53:54 +04:00
struct sk_filter * filter ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
struct nlattr * attr ;
unsigned int flen ;
2013-04-25 10:53:54 +04:00
int err = 0 ;
2014-04-24 01:26:25 +04:00
if ( ! may_report_filterinfo ) {
2013-04-25 10:53:54 +04:00
nla_reserve ( skb , attrtype , 0 ) ;
return 0 ;
}
rcu_read_lock ( ) ;
filter = rcu_dereference ( sk - > sk_filter ) ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
if ( ! filter )
goto out ;
2013-04-25 10:53:54 +04:00
net: filter: split 'struct sk_filter' into socket and bpf parts
clean up names related to socket filtering and bpf in the following way:
- everything that deals with sockets keeps 'sk_*' prefix
- everything that is pure BPF is changed to 'bpf_*' prefix
split 'struct sk_filter' into
struct sk_filter {
atomic_t refcnt;
struct rcu_head rcu;
struct bpf_prog *prog;
};
and
struct bpf_prog {
u32 jited:1,
len:31;
struct sock_fprog_kern *orig_prog;
unsigned int (*bpf_func)(const struct sk_buff *skb,
const struct bpf_insn *filter);
union {
struct sock_filter insns[0];
struct bpf_insn insnsi[0];
struct work_struct work;
};
};
so that 'struct bpf_prog' can be used independent of sockets and cleans up
'unattached' bpf use cases
split SK_RUN_FILTER macro into:
SK_RUN_FILTER to be used with 'struct sk_filter *' and
BPF_PROG_RUN to be used with 'struct bpf_prog *'
__sk_filter_release(struct sk_filter *) gains
__bpf_prog_release(struct bpf_prog *) helper function
also perform related renames for the functions that work
with 'struct bpf_prog *', since they're on the same lines:
sk_filter_size -> bpf_prog_size
sk_filter_select_runtime -> bpf_prog_select_runtime
sk_filter_free -> bpf_prog_free
sk_unattached_filter_create -> bpf_prog_create
sk_unattached_filter_destroy -> bpf_prog_destroy
sk_store_orig_filter -> bpf_prog_store_orig_filter
sk_release_orig_filter -> bpf_release_orig_filter
__sk_migrate_filter -> bpf_migrate_filter
__sk_prepare_filter -> bpf_prepare_filter
API for attaching classic BPF to a socket stays the same:
sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *)
and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program
which is used by sockets, tun, af_packet
API for 'unattached' BPF programs becomes:
bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *)
and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program
which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-07-31 07:34:16 +04:00
fprog = filter - > prog - > orig_prog ;
2015-09-02 15:00:36 +03:00
if ( ! fprog )
goto out ;
2014-07-31 07:34:13 +04:00
flen = bpf_classic_proglen ( fprog ) ;
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
attr = nla_reserve ( skb , attrtype , flen ) ;
2013-04-25 10:53:54 +04:00
if ( attr = = NULL ) {
err = - EMSGSIZE ;
goto out ;
}
net: filter: keep original BPF program around
In order to open up the possibility to internally transform a BPF program
into an alternative and possibly non-trivial reversible representation, we
need to keep the original BPF program around, so that it can be passed back
to user space w/o the need of a complex decoder.
The reason for that use case resides in commit a8fc92778080 ("sk-filter:
Add ability to get socket filter program (v2)"), that is, the ability
to retrieve the currently attached BPF filter from a given socket used
mainly by the checkpoint-restore project, for example.
Therefore, we add two helpers sk_{store,release}_orig_filter for taking
care of that. In the sk_unattached_filter_create() case, there's no such
possibility/requirement to retrieve a loaded BPF program. Therefore, we
can spare us the work in that case.
This approach will simplify and slightly speed up both, sk_get_filter()
and sock_diag_put_filterinfo() handlers as we won't need to successively
decode filters anymore through sk_decode_filter(). As we still need
sk_decode_filter() later on, we're keeping it around.
Joint work with Alexei Starovoitov.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-03-28 21:58:19 +04:00
memcpy ( nla_data ( attr ) , fprog - > filter , flen ) ;
2013-04-25 10:53:54 +04:00
out :
rcu_read_unlock ( ) ;
return err ;
}
EXPORT_SYMBOL ( sock_diag_put_filterinfo ) ;
2015-06-15 18:26:18 +03:00
struct broadcast_sk {
struct sock * sk ;
struct work_struct work ;
} ;
static size_t sock_diag_nlmsg_size ( void )
{
return NLMSG_ALIGN ( sizeof ( struct inet_diag_msg )
+ nla_total_size ( sizeof ( u8 ) ) /* INET_DIAG_PROTOCOL */
2016-04-26 11:06:14 +03:00
+ nla_total_size_64bit ( sizeof ( struct tcp_info ) ) ) ; /* INET_DIAG_INFO */
2015-06-15 18:26:18 +03:00
}
static void sock_diag_broadcast_destroy_work ( struct work_struct * work )
{
struct broadcast_sk * bsk =
container_of ( work , struct broadcast_sk , work ) ;
struct sock * sk = bsk - > sk ;
const struct sock_diag_handler * hndl ;
struct sk_buff * skb ;
const enum sknetlink_groups group = sock_diag_destroy_group ( sk ) ;
int err = - 1 ;
WARN_ON ( group = = SKNLGRP_NONE ) ;
skb = nlmsg_new ( sock_diag_nlmsg_size ( ) , GFP_KERNEL ) ;
if ( ! skb )
goto out ;
mutex_lock ( & sock_diag_table_mutex ) ;
hndl = sock_diag_handlers [ sk - > sk_family ] ;
if ( hndl & & hndl - > get_info )
err = hndl - > get_info ( skb , sk ) ;
mutex_unlock ( & sock_diag_table_mutex ) ;
if ( ! err )
nlmsg_multicast ( sock_net ( sk ) - > diag_nlsk , skb , 0 , group ,
GFP_KERNEL ) ;
else
kfree_skb ( skb ) ;
out :
sk_destruct ( sk ) ;
kfree ( bsk ) ;
}
void sock_diag_broadcast_destroy ( struct sock * sk )
{
/* Note, this function is often called from an interrupt context. */
struct broadcast_sk * bsk =
kmalloc ( sizeof ( struct broadcast_sk ) , GFP_ATOMIC ) ;
if ( ! bsk )
return sk_destruct ( sk ) ;
bsk - > sk = sk ;
INIT_WORK ( & bsk - > work , sock_diag_broadcast_destroy_work ) ;
queue_work ( broadcast_wq , & bsk - > work ) ;
}
2011-12-06 11:59:52 +04:00
void sock_diag_register_inet_compat ( int ( * fn ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) )
{
mutex_lock ( & sock_diag_table_mutex ) ;
inet_rcv_compat = fn ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_register_inet_compat ) ;
void sock_diag_unregister_inet_compat ( int ( * fn ) ( struct sk_buff * skb , struct nlmsghdr * nlh ) )
{
mutex_lock ( & sock_diag_table_mutex ) ;
inet_rcv_compat = NULL ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_unregister_inet_compat ) ;
2012-04-24 22:21:07 +04:00
int sock_diag_register ( const struct sock_diag_handler * hndl )
2011-12-06 11:59:52 +04:00
{
int err = 0 ;
2011-12-08 00:49:38 +04:00
if ( hndl - > family > = AF_MAX )
2011-12-06 11:59:52 +04:00
return - EINVAL ;
mutex_lock ( & sock_diag_table_mutex ) ;
if ( sock_diag_handlers [ hndl - > family ] )
err = - EBUSY ;
else
sock_diag_handlers [ hndl - > family ] = hndl ;
mutex_unlock ( & sock_diag_table_mutex ) ;
return err ;
}
EXPORT_SYMBOL_GPL ( sock_diag_register ) ;
2012-04-24 22:21:07 +04:00
void sock_diag_unregister ( const struct sock_diag_handler * hnld )
2011-12-06 11:59:52 +04:00
{
int family = hnld - > family ;
2011-12-08 00:49:38 +04:00
if ( family > = AF_MAX )
2011-12-06 11:59:52 +04:00
return ;
mutex_lock ( & sock_diag_table_mutex ) ;
BUG_ON ( sock_diag_handlers [ family ] ! = hnld ) ;
sock_diag_handlers [ family ] = NULL ;
mutex_unlock ( & sock_diag_table_mutex ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_unregister ) ;
2015-12-16 06:30:03 +03:00
static int __sock_diag_cmd ( struct sk_buff * skb , struct nlmsghdr * nlh )
2011-12-06 11:59:52 +04:00
{
int err ;
2012-06-27 03:36:11 +04:00
struct sock_diag_req * req = nlmsg_data ( nlh ) ;
2012-04-24 22:21:07 +04:00
const struct sock_diag_handler * hndl ;
2011-12-06 11:59:52 +04:00
if ( nlmsg_len ( nlh ) < sizeof ( * req ) )
return - EINVAL ;
2013-02-23 05:13:47 +04:00
if ( req - > sdiag_family > = AF_MAX )
return - EINVAL ;
2018-08-14 01:23:13 +03:00
req - > sdiag_family = array_index_nospec ( req - > sdiag_family , AF_MAX ) ;
2013-02-23 05:13:47 +04:00
2013-02-23 05:13:48 +04:00
if ( sock_diag_handlers [ req - > sdiag_family ] = = NULL )
sock_diag: request _diag module only when the family or proto has been registered
Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.
Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.
For example:
$ lsmod|grep sctp
$ ss
$ lsmod|grep sctp
sctp_diag 16384 0
sctp 323584 5 sctp_diag
inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag
libcrc32c 16384 3 nf_conntrack,nf_nat,sctp
As these family and proto modules are loaded unintentionally, it
could cause some problems, like:
- Some debug tools use 'ss' to collect the socket info, which loads all
those diag and family and protocol modules. It's noisy for identifying
issues.
- Users usually expect to drop sctp init packet silently when they
have no sense of sctp protocol instead of sending abort back.
- It wastes resources (especially with multiple netns), and SCTP module
can't be unloaded once it's loaded.
...
In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.
This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.
Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.
v1->v2:
- move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
- define sock_load_diag_module in sock.c and export one symbol
only.
- improve the changelog.
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-10 13:57:50 +03:00
sock_load_diag_module ( req - > sdiag_family , 0 ) ;
2013-02-23 05:13:48 +04:00
mutex_lock ( & sock_diag_table_mutex ) ;
hndl = sock_diag_handlers [ req - > sdiag_family ] ;
2011-12-06 11:59:52 +04:00
if ( hndl = = NULL )
err = - ENOENT ;
2015-12-16 06:30:03 +03:00
else if ( nlh - > nlmsg_type = = SOCK_DIAG_BY_FAMILY )
2011-12-06 11:59:52 +04:00
err = hndl - > dump ( skb , nlh ) ;
2015-12-16 06:30:03 +03:00
else if ( nlh - > nlmsg_type = = SOCK_DESTROY & & hndl - > destroy )
err = hndl - > destroy ( skb , nlh ) ;
else
err = - EOPNOTSUPP ;
2013-02-23 05:13:48 +04:00
mutex_unlock ( & sock_diag_table_mutex ) ;
2011-12-06 11:59:52 +04:00
return err ;
}
2017-04-12 15:34:04 +03:00
static int sock_diag_rcv_msg ( struct sk_buff * skb , struct nlmsghdr * nlh ,
struct netlink_ext_ack * extack )
2011-12-06 11:59:52 +04:00
{
int ret ;
switch ( nlh - > nlmsg_type ) {
case TCPDIAG_GETSOCK :
case DCCPDIAG_GETSOCK :
if ( inet_rcv_compat = = NULL )
sock_diag: request _diag module only when the family or proto has been registered
Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.
Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.
For example:
$ lsmod|grep sctp
$ ss
$ lsmod|grep sctp
sctp_diag 16384 0
sctp 323584 5 sctp_diag
inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag
libcrc32c 16384 3 nf_conntrack,nf_nat,sctp
As these family and proto modules are loaded unintentionally, it
could cause some problems, like:
- Some debug tools use 'ss' to collect the socket info, which loads all
those diag and family and protocol modules. It's noisy for identifying
issues.
- Users usually expect to drop sctp init packet silently when they
have no sense of sctp protocol instead of sending abort back.
- It wastes resources (especially with multiple netns), and SCTP module
can't be unloaded once it's loaded.
...
In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.
This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.
Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.
v1->v2:
- move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
- define sock_load_diag_module in sock.c and export one symbol
only.
- improve the changelog.
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-10 13:57:50 +03:00
sock_load_diag_module ( AF_INET , 0 ) ;
2011-12-06 11:59:52 +04:00
mutex_lock ( & sock_diag_table_mutex ) ;
if ( inet_rcv_compat ! = NULL )
ret = inet_rcv_compat ( skb , nlh ) ;
else
ret = - EOPNOTSUPP ;
mutex_unlock ( & sock_diag_table_mutex ) ;
return ret ;
case SOCK_DIAG_BY_FAMILY :
2015-12-16 06:30:03 +03:00
case SOCK_DESTROY :
return __sock_diag_cmd ( skb , nlh ) ;
2011-12-06 11:59:52 +04:00
default :
return - EINVAL ;
}
}
static DEFINE_MUTEX ( sock_diag_mutex ) ;
static void sock_diag_rcv ( struct sk_buff * skb )
{
mutex_lock ( & sock_diag_mutex ) ;
netlink_rcv_skb ( skb , & sock_diag_rcv_msg ) ;
mutex_unlock ( & sock_diag_mutex ) ;
}
2015-06-15 18:26:18 +03:00
static int sock_diag_bind ( struct net * net , int group )
{
switch ( group ) {
case SKNLGRP_INET_TCP_DESTROY :
case SKNLGRP_INET_UDP_DESTROY :
if ( ! sock_diag_handlers [ AF_INET ] )
sock_diag: request _diag module only when the family or proto has been registered
Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.
Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.
For example:
$ lsmod|grep sctp
$ ss
$ lsmod|grep sctp
sctp_diag 16384 0
sctp 323584 5 sctp_diag
inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag
libcrc32c 16384 3 nf_conntrack,nf_nat,sctp
As these family and proto modules are loaded unintentionally, it
could cause some problems, like:
- Some debug tools use 'ss' to collect the socket info, which loads all
those diag and family and protocol modules. It's noisy for identifying
issues.
- Users usually expect to drop sctp init packet silently when they
have no sense of sctp protocol instead of sending abort back.
- It wastes resources (especially with multiple netns), and SCTP module
can't be unloaded once it's loaded.
...
In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.
This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.
Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.
v1->v2:
- move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
- define sock_load_diag_module in sock.c and export one symbol
only.
- improve the changelog.
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-10 13:57:50 +03:00
sock_load_diag_module ( AF_INET , 0 ) ;
2015-06-15 18:26:18 +03:00
break ;
case SKNLGRP_INET6_TCP_DESTROY :
case SKNLGRP_INET6_UDP_DESTROY :
if ( ! sock_diag_handlers [ AF_INET6 ] )
sock_diag: request _diag module only when the family or proto has been registered
Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.
Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.
For example:
$ lsmod|grep sctp
$ ss
$ lsmod|grep sctp
sctp_diag 16384 0
sctp 323584 5 sctp_diag
inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag
libcrc32c 16384 3 nf_conntrack,nf_nat,sctp
As these family and proto modules are loaded unintentionally, it
could cause some problems, like:
- Some debug tools use 'ss' to collect the socket info, which loads all
those diag and family and protocol modules. It's noisy for identifying
issues.
- Users usually expect to drop sctp init packet silently when they
have no sense of sctp protocol instead of sending abort back.
- It wastes resources (especially with multiple netns), and SCTP module
can't be unloaded once it's loaded.
...
In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.
This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.
Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.
v1->v2:
- move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
- define sock_load_diag_module in sock.c and export one symbol
only.
- improve the changelog.
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-10 13:57:50 +03:00
sock_load_diag_module ( AF_INET6 , 0 ) ;
2015-06-15 18:26:18 +03:00
break ;
}
return 0 ;
}
2015-12-16 06:30:03 +03:00
int sock_diag_destroy ( struct sock * sk , int err )
{
if ( ! ns_capable ( sock_net ( sk ) - > user_ns , CAP_NET_ADMIN ) )
return - EPERM ;
if ( ! sk - > sk_prot - > diag_destroy )
return - EOPNOTSUPP ;
return sk - > sk_prot - > diag_destroy ( sk , err ) ;
}
EXPORT_SYMBOL_GPL ( sock_diag_destroy ) ;
2012-07-16 08:28:49 +04:00
static int __net_init diag_net_init ( struct net * net )
2011-12-06 11:59:52 +04:00
{
2012-06-29 10:15:21 +04:00
struct netlink_kernel_cfg cfg = {
2015-06-15 18:26:18 +03:00
. groups = SKNLGRP_MAX ,
2012-06-29 10:15:21 +04:00
. input = sock_diag_rcv ,
2015-06-15 18:26:18 +03:00
. bind = sock_diag_bind ,
. flags = NL_CFG_F_NONROOT_RECV ,
2012-06-29 10:15:21 +04:00
} ;
2012-09-08 06:53:54 +04:00
net - > diag_nlsk = netlink_kernel_create ( net , NETLINK_SOCK_DIAG , & cfg ) ;
2012-07-16 08:28:49 +04:00
return net - > diag_nlsk = = NULL ? - ENOMEM : 0 ;
}
static void __net_exit diag_net_exit ( struct net * net )
{
netlink_kernel_release ( net - > diag_nlsk ) ;
net - > diag_nlsk = NULL ;
}
static struct pernet_operations diag_net_ops = {
. init = diag_net_init ,
. exit = diag_net_exit ,
} ;
static int __init sock_diag_init ( void )
{
2015-06-15 18:26:18 +03:00
broadcast_wq = alloc_workqueue ( " sock_diag_events " , 0 , 0 ) ;
BUG_ON ( ! broadcast_wq ) ;
2012-07-16 08:28:49 +04:00
return register_pernet_subsys ( & diag_net_ops ) ;
2011-12-06 11:59:52 +04:00
}
2015-10-08 00:27:43 +03:00
device_initcall ( sock_diag_init ) ;