2019-05-27 08:55:01 +02:00
// SPDX-License-Identifier: GPL-2.0-or-later
2020-07-22 17:32:06 +01:00
/* L2TPv3 IP encapsulation support
2010-04-02 06:19:00 +00:00
*
* Copyright ( c ) 2008 , 2009 , 2010 Katalix Systems Ltd
*/
2012-05-16 09:55:56 +00:00
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2017-02-09 16:15:52 -08:00
# include <asm/ioctls.h>
2010-04-02 06:19:00 +00:00
# include <linux/icmp.h>
# include <linux/module.h>
# include <linux/skbuff.h>
# include <linux/random.h>
# include <linux/socket.h>
# include <linux/l2tp.h>
# include <linux/in.h>
# include <net/sock.h>
# include <net/ip.h>
# include <net/icmp.h>
# include <net/udp.h>
# include <net/inet_common.h>
# include <net/tcp_states.h>
# include <net/protocol.h>
# include <net/xfrm.h>
# include "l2tp_core.h"
struct l2tp_ip_sock {
/* inet_sock has to be the first member of l2tp_ip_sock */
struct inet_sock inet ;
2012-04-29 21:48:48 +00:00
u32 conn_id ;
u32 peer_conn_id ;
2010-04-02 06:19:00 +00:00
} ;
static DEFINE_RWLOCK ( l2tp_ip_lock ) ;
static struct hlist_head l2tp_ip_table ;
static struct hlist_head l2tp_ip_bind_table ;
static inline struct l2tp_ip_sock * l2tp_ip_sk ( const struct sock * sk )
{
return ( struct l2tp_ip_sock * ) sk ;
}
2016-12-30 19:48:20 +01:00
static struct sock * __l2tp_ip_bind_lookup ( const struct net * net , __be32 laddr ,
__be32 raddr , int dif , u32 tunnel_id )
2010-04-02 06:19:00 +00:00
{
struct sock * sk ;
hlist: drop the node parameter from iterators
I'm not sure why, but the hlist for each entry iterators were conceived
list_for_each_entry(pos, head, member)
The hlist ones were greedy and wanted an extra parameter:
hlist_for_each_entry(tpos, pos, head, member)
Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.
Besides the semantic patch, there was some manual work required:
- Fix up the actual hlist iterators in linux/list.h
- Fix up the declaration of other iterators based on the hlist ones.
- A very small amount of places were using the 'node' parameter, this
was modified to use 'obj->member' instead.
- Coccinelle didn't handle the hlist_for_each_entry_safe iterator
properly, so those had to be fixed up manually.
The semantic patch which is mostly the work of Peter Senna Tschudin is here:
@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;
type T;
expression a,c,d,e;
identifier b;
statement S;
@@
-T b;
<+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
...+>
[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 17:06:00 -08:00
sk_for_each_bound ( sk , & l2tp_ip_bind_table ) {
2017-01-06 20:03:55 +01:00
const struct l2tp_ip_sock * l2tp = l2tp_ip_sk ( sk ) ;
const struct inet_sock * inet = inet_sk ( sk ) ;
2022-05-13 11:55:48 -07:00
int bound_dev_if ;
2010-04-02 06:19:00 +00:00
2017-01-06 20:03:57 +01:00
if ( ! net_eq ( sock_net ( sk ) , net ) )
continue ;
2022-05-13 11:55:48 -07:00
bound_dev_if = READ_ONCE ( sk - > sk_bound_dev_if ) ;
if ( bound_dev_if & & dif & & bound_dev_if ! = dif )
2017-01-06 20:03:57 +01:00
continue ;
if ( inet - > inet_rcv_saddr & & laddr & &
inet - > inet_rcv_saddr ! = laddr )
continue ;
if ( inet - > inet_daddr & & raddr & & inet - > inet_daddr ! = raddr )
continue ;
if ( l2tp - > conn_id ! = tunnel_id )
continue ;
goto found ;
2010-04-02 06:19:00 +00:00
}
sk = NULL ;
found :
return sk ;
}
/* When processing receive frames, there are two cases to
* consider . Data frames consist of a non - zero session - id and an
* optional cookie . Control frames consist of a regular L2TP header
* preceded by 32 - bits of zeros .
*
* L2TPv3 Session Header Over IP
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | Session ID |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | Cookie ( optional , maximum 64 bits ) . . .
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
*
* L2TPv3 Control Message Header Over IP
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | ( 32 bits of zeros ) |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | T | L | x | x | S | x | x | x | x | x | x | x | Ver | Length |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | Control Connection ID |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
* | Ns | Nr |
* + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
*
* All control frames are passed to userspace .
*/
static int l2tp_ip_recv ( struct sk_buff * skb )
{
2013-02-05 14:36:02 -05:00
struct net * net = dev_net ( skb - > dev ) ;
2010-04-02 06:19:00 +00:00
struct sock * sk ;
u32 session_id ;
u32 tunnel_id ;
unsigned char * ptr , * optr ;
struct l2tp_session * session ;
struct l2tp_tunnel * tunnel = NULL ;
2017-11-03 16:49:00 +01:00
struct iphdr * iph ;
2010-04-02 06:19:00 +00:00
if ( ! pskb_may_pull ( skb , 4 ) )
goto discard ;
2016-04-03 22:09:23 +08:00
/* Point to L2TP header */
2020-07-24 16:31:49 +01:00
optr = skb - > data ;
ptr = skb - > data ;
2020-07-22 17:32:05 +01:00
session_id = ntohl ( * ( ( __be32 * ) ptr ) ) ;
2010-04-02 06:19:00 +00:00
ptr + = 4 ;
/* RFC3931: L2TP/IP packets have the first 4 bytes containing
* the session_id . If it is 0 , the packet is a L2TP control
* frame and the session_id value can be discarded .
*/
if ( session_id = = 0 ) {
__skb_pull ( skb , 4 ) ;
goto pass_up ;
}
/* Ok, this is a data packet. Lookup the session. */
2018-08-10 13:21:57 +02:00
session = l2tp_session_get ( net , session_id ) ;
2017-03-31 13:02:25 +02:00
if ( ! session )
2010-04-02 06:19:00 +00:00
goto discard ;
tunnel = session - > tunnel ;
2017-03-31 13:02:25 +02:00
if ( ! tunnel )
goto discard_sess ;
2010-04-02 06:19:00 +00:00
2019-01-30 14:55:14 +08:00
if ( l2tp_v3_ensure_opt_in_linear ( session , skb , & ptr , & optr ) )
goto discard_sess ;
2018-07-25 14:53:33 +02:00
l2tp_recv_common ( session , skb , ptr , optr , 0 , skb - > len ) ;
2017-03-31 13:02:25 +02:00
l2tp_session_dec_refcount ( session ) ;
2010-04-02 06:19:00 +00:00
return 0 ;
pass_up :
/* Get the tunnel_id from the L2TP header */
if ( ! pskb_may_pull ( skb , 12 ) )
goto discard ;
if ( ( skb - > data [ 0 ] & 0xc0 ) ! = 0xc0 )
goto discard ;
2020-07-22 17:32:05 +01:00
tunnel_id = ntohl ( * ( __be32 * ) & skb - > data [ 4 ] ) ;
2017-11-03 16:49:00 +01:00
iph = ( struct iphdr * ) skb_network_header ( skb ) ;
read_lock_bh ( & l2tp_ip_lock ) ;
sk = __l2tp_ip_bind_lookup ( net , iph - > daddr , iph - > saddr , inet_iif ( skb ) ,
tunnel_id ) ;
if ( ! sk ) {
2010-04-02 06:19:00 +00:00
read_unlock_bh ( & l2tp_ip_lock ) ;
2017-11-03 16:49:00 +01:00
goto discard ;
2010-04-02 06:19:00 +00:00
}
2017-11-03 16:49:00 +01:00
sock_hold ( sk ) ;
read_unlock_bh ( & l2tp_ip_lock ) ;
2010-04-02 06:19:00 +00:00
if ( ! xfrm4_policy_check ( sk , XFRM_POLICY_IN , skb ) )
goto discard_put ;
2019-09-29 20:54:03 +02:00
nf_reset_ct ( skb ) ;
2010-04-02 06:19:00 +00:00
return sk_receive_skb ( sk , skb , 1 ) ;
2017-03-31 13:02:25 +02:00
discard_sess :
l2tp_session_dec_refcount ( session ) ;
goto discard ;
2010-04-02 06:19:00 +00:00
discard_put :
sock_put ( sk ) ;
discard :
kfree_skb ( skb ) ;
return 0 ;
}
2020-05-29 11:20:53 -07:00
static int l2tp_ip_hash ( struct sock * sk )
2010-04-02 06:19:00 +00:00
{
2020-05-29 11:20:53 -07:00
if ( sk_unhashed ( sk ) ) {
write_lock_bh ( & l2tp_ip_lock ) ;
sk_add_node ( sk , & l2tp_ip_table ) ;
write_unlock_bh ( & l2tp_ip_lock ) ;
}
return 0 ;
}
2010-04-02 06:19:00 +00:00
2020-05-29 11:20:53 -07:00
static void l2tp_ip_unhash ( struct sock * sk )
{
if ( sk_unhashed ( sk ) )
return ;
2010-04-02 06:19:00 +00:00
write_lock_bh ( & l2tp_ip_lock ) ;
2020-05-29 11:20:53 -07:00
sk_del_node_init ( sk ) ;
2010-04-02 06:19:00 +00:00
write_unlock_bh ( & l2tp_ip_lock ) ;
2020-05-29 11:20:53 -07:00
}
static int l2tp_ip_open ( struct sock * sk )
{
/* Prevent autobind. We don't have ports. */
inet_sk ( sk ) - > inet_num = IPPROTO_L2TP ;
2010-04-02 06:19:00 +00:00
2020-05-29 11:20:53 -07:00
l2tp_ip_hash ( sk ) ;
2010-04-02 06:19:00 +00:00
return 0 ;
}
static void l2tp_ip_close ( struct sock * sk , long timeout )
{
write_lock_bh ( & l2tp_ip_lock ) ;
hlist_del_init ( & sk - > sk_bind_node ) ;
2012-04-10 00:10:42 +00:00
sk_del_node_init ( sk ) ;
2010-04-02 06:19:00 +00:00
write_unlock_bh ( & l2tp_ip_lock ) ;
sk_common_release ( sk ) ;
}
static void l2tp_ip_destroy_sock ( struct sock * sk )
{
2020-09-03 09:54:51 +01:00
struct l2tp_tunnel * tunnel = l2tp_sk_to_tunnel ( sk ) ;
2010-04-02 06:19:00 +00:00
struct sk_buff * skb ;
while ( ( skb = __skb_dequeue_tail ( & sk - > sk_write_queue ) ) ! = NULL )
kfree_skb ( skb ) ;
l2tp: fix races with tunnel socket close
The tunnel socket tunnel->sock (struct sock) is accessed when
preparing a new ppp session on a tunnel at pppol2tp_session_init. If
the socket is closed by a thread while another is creating a new
session, the threads race. In pppol2tp_connect, the tunnel object may
be created if the pppol2tp socket is associated with the special
session_id 0 and the tunnel socket is looked up using the provided
fd. When handling this, pppol2tp_connect cannot sock_hold the tunnel
socket to prevent it being destroyed during pppol2tp_connect since
this may itself may race with the socket being destroyed. Doing
sockfd_lookup in pppol2tp_connect isn't sufficient to prevent
tunnel->sock going away either because a given tunnel socket fd may be
reused between calls to pppol2tp_connect. Instead, have
l2tp_tunnel_create sock_hold the tunnel socket before it does
sockfd_put. This ensures that the tunnel's socket is always extant
while the tunnel object exists. Hold a ref on the socket until the
tunnel is destroyed and ensure that all tunnel destroy paths go
through a common function (l2tp_tunnel_delete) since this will do the
final sock_put to release the tunnel socket.
Since the tunnel's socket is now guaranteed to exist if the tunnel
exists, we no longer need to use sockfd_lookup via l2tp_sock_to_tunnel
to derive the tunnel from the socket since this is always
sk_user_data.
Also, sessions no longer sock_hold the tunnel socket since sessions
already hold a tunnel ref and the tunnel sock will not be freed until
the tunnel is freed. Removing these sock_holds in
l2tp_session_register avoids a possible sock leak in the
pppol2tp_connect error path if l2tp_session_register succeeds but
attaching a ppp channel fails. The pppol2tp_connect error path could
have been fixed instead and have the sock ref dropped when the session
is freed, but doing a sock_put of the tunnel socket when the session
is freed would require a new session_free callback. It is simpler to
just remove the sock_hold of the tunnel socket in
l2tp_session_register, now that the tunnel socket lifetime is
guaranteed.
Finally, some init code in l2tp_tunnel_create is reordered to ensure
that the new tunnel object's refcount is set and the tunnel socket ref
is taken before the tunnel socket destructor callbacks are set.
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 0 PID: 4360 Comm: syzbot_19c09769 Not tainted 4.16.0-rc2+ #34
Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
RIP: 0010:pppol2tp_session_init+0x1d6/0x500
RSP: 0018:ffff88001377fb40 EFLAGS: 00010212
RAX: dffffc0000000000 RBX: ffff88001636a940 RCX: ffffffff84836c1d
RDX: 0000000000000045 RSI: 0000000055976744 RDI: 0000000000000228
RBP: ffff88001377fb60 R08: ffffffff84836bc8 R09: 0000000000000002
R10: ffff88001377fab8 R11: 0000000000000001 R12: 0000000000000000
R13: ffff88001636aac8 R14: ffff8800160f81c0 R15: 1ffff100026eff76
FS: 00007ffb3ea66700(0000) GS:ffff88001a400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020e77000 CR3: 0000000016261000 CR4: 00000000000006f0
Call Trace:
pppol2tp_connect+0xd18/0x13c0
? pppol2tp_session_create+0x170/0x170
? __might_fault+0x115/0x1d0
? lock_downgrade+0x860/0x860
? __might_fault+0xe5/0x1d0
? security_socket_connect+0x8e/0xc0
SYSC_connect+0x1b6/0x310
? SYSC_bind+0x280/0x280
? __do_page_fault+0x5d1/0xca0
? up_read+0x1f/0x40
? __do_page_fault+0x3c8/0xca0
SyS_connect+0x29/0x30
? SyS_accept+0x40/0x40
do_syscall_64+0x1e0/0x730
? trace_hardirqs_off_thunk+0x1a/0x1c
entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7ffb3e376259
RSP: 002b:00007ffeda4f6508 EFLAGS: 00000202 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000020e77012 RCX: 00007ffb3e376259
RDX: 000000000000002e RSI: 0000000020e77000 RDI: 0000000000000004
RBP: 00007ffeda4f6540 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000400b60
R13: 00007ffeda4f6660 R14: 0000000000000000 R15: 0000000000000000
Code: 80 3d b0 ff 06 02 00 0f 84 07 02 00 00 e8 13 d6 db fc 49 8d bc 24 28 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 f
a 48 c1 ea 03 <80> 3c 02 00 0f 85 ed 02 00 00 4d 8b a4 24 28 02 00 00 e8 13 16
Fixes: 80d84ef3ff1dd ("l2tp: prevent l2tp_tunnel_delete racing with userspace close")
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-23 17:45:45 +00:00
if ( tunnel )
l2tp_tunnel_delete ( tunnel ) ;
2010-04-02 06:19:00 +00:00
}
static int l2tp_ip_bind ( struct sock * sk , struct sockaddr * uaddr , int addr_len )
{
struct inet_sock * inet = inet_sk ( sk ) ;
2020-07-22 17:32:05 +01:00
struct sockaddr_l2tpip * addr = ( struct sockaddr_l2tpip * ) uaddr ;
2013-02-05 14:36:02 -05:00
struct net * net = sock_net ( sk ) ;
2012-05-29 03:30:42 +00:00
int ret ;
2010-04-02 06:19:00 +00:00
int chk_addr_ret ;
2012-05-29 03:30:42 +00:00
if ( addr_len < sizeof ( struct sockaddr_l2tpip ) )
return - EINVAL ;
if ( addr - > l2tp_family ! = AF_INET )
return - EINVAL ;
2010-04-02 06:19:00 +00:00
lock_sock ( sk ) ;
l2tp: fix racy socket lookup in l2tp_ip and l2tp_ip6 bind()
It's not enough to check for sockets bound to same address at the
beginning of l2tp_ip{,6}_bind(): even if no socket is found at that
time, a socket with the same address could be bound before we take
the l2tp lock again.
This patch moves the lookup right before inserting the new socket, so
that no change can ever happen to the list between address lookup and
socket insertion.
Care is taken to avoid side effects on the socket in case of failure.
That is, modifications of the socket are done after the lookup, when
binding is guaranteed to succeed, and before releasing the l2tp lock,
so that concurrent lookups will always see fully initialised sockets.
For l2tp_ip, 'ret' is set to -EINVAL before checking the SOCK_ZAPPED
bit. Error code was mistakenly set to -EADDRINUSE on error by commit
32c231164b76 ("l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()").
Using -EINVAL restores original behaviour.
For l2tp_ip6, the lookup is now always done with the correct bound
device. Before this patch, when binding to a link-local address, the
lookup was done with the original sk->sk_bound_dev_if, which was later
overwritten with addr->l2tp_scope_id. Lookup is now performed with the
final sk->sk_bound_dev_if value.
Finally, the (addr_len >= sizeof(struct sockaddr_in6)) check has been
dropped: addr is a sockaddr_l2tpip6 not sockaddr_in6 and addr_len has
already been checked at this point (this part of the code seems to have
been copy-pasted from net/ipv6/raw.c).
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 13:09:46 +01:00
ret = - EINVAL ;
2016-11-18 22:13:00 +01:00
if ( ! sock_flag ( sk , SOCK_ZAPPED ) )
goto out ;
2017-01-06 20:03:54 +01:00
if ( sk - > sk_state ! = TCP_CLOSE )
2010-04-02 06:19:00 +00:00
goto out ;
2013-02-05 14:36:02 -05:00
chk_addr_ret = inet_addr_type ( net , addr - > l2tp_addr . s_addr ) ;
2010-04-02 06:19:00 +00:00
ret = - EADDRNOTAVAIL ;
if ( addr - > l2tp_addr . s_addr & & chk_addr_ret ! = RTN_LOCAL & &
chk_addr_ret ! = RTN_MULTICAST & & chk_addr_ret ! = RTN_BROADCAST )
goto out ;
2020-07-24 16:31:49 +01:00
if ( addr - > l2tp_addr . s_addr ) {
inet - > inet_rcv_saddr = addr - > l2tp_addr . s_addr ;
inet - > inet_saddr = addr - > l2tp_addr . s_addr ;
}
2010-04-02 06:19:00 +00:00
if ( chk_addr_ret = = RTN_MULTICAST | | chk_addr_ret = = RTN_BROADCAST )
inet - > inet_saddr = 0 ; /* Use device */
l2tp: fix racy socket lookup in l2tp_ip and l2tp_ip6 bind()
It's not enough to check for sockets bound to same address at the
beginning of l2tp_ip{,6}_bind(): even if no socket is found at that
time, a socket with the same address could be bound before we take
the l2tp lock again.
This patch moves the lookup right before inserting the new socket, so
that no change can ever happen to the list between address lookup and
socket insertion.
Care is taken to avoid side effects on the socket in case of failure.
That is, modifications of the socket are done after the lookup, when
binding is guaranteed to succeed, and before releasing the l2tp lock,
so that concurrent lookups will always see fully initialised sockets.
For l2tp_ip, 'ret' is set to -EINVAL before checking the SOCK_ZAPPED
bit. Error code was mistakenly set to -EADDRINUSE on error by commit
32c231164b76 ("l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()").
Using -EINVAL restores original behaviour.
For l2tp_ip6, the lookup is now always done with the correct bound
device. Before this patch, when binding to a link-local address, the
lookup was done with the original sk->sk_bound_dev_if, which was later
overwritten with addr->l2tp_scope_id. Lookup is now performed with the
final sk->sk_bound_dev_if value.
Finally, the (addr_len >= sizeof(struct sockaddr_in6)) check has been
dropped: addr is a sockaddr_l2tpip6 not sockaddr_in6 and addr_len has
already been checked at this point (this part of the code seems to have
been copy-pasted from net/ipv6/raw.c).
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 13:09:46 +01:00
write_lock_bh ( & l2tp_ip_lock ) ;
2016-12-30 19:48:20 +01:00
if ( __l2tp_ip_bind_lookup ( net , addr - > l2tp_addr . s_addr , 0 ,
l2tp: fix racy socket lookup in l2tp_ip and l2tp_ip6 bind()
It's not enough to check for sockets bound to same address at the
beginning of l2tp_ip{,6}_bind(): even if no socket is found at that
time, a socket with the same address could be bound before we take
the l2tp lock again.
This patch moves the lookup right before inserting the new socket, so
that no change can ever happen to the list between address lookup and
socket insertion.
Care is taken to avoid side effects on the socket in case of failure.
That is, modifications of the socket are done after the lookup, when
binding is guaranteed to succeed, and before releasing the l2tp lock,
so that concurrent lookups will always see fully initialised sockets.
For l2tp_ip, 'ret' is set to -EINVAL before checking the SOCK_ZAPPED
bit. Error code was mistakenly set to -EADDRINUSE on error by commit
32c231164b76 ("l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()").
Using -EINVAL restores original behaviour.
For l2tp_ip6, the lookup is now always done with the correct bound
device. Before this patch, when binding to a link-local address, the
lookup was done with the original sk->sk_bound_dev_if, which was later
overwritten with addr->l2tp_scope_id. Lookup is now performed with the
final sk->sk_bound_dev_if value.
Finally, the (addr_len >= sizeof(struct sockaddr_in6)) check has been
dropped: addr is a sockaddr_l2tpip6 not sockaddr_in6 and addr_len has
already been checked at this point (this part of the code seems to have
been copy-pasted from net/ipv6/raw.c).
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 13:09:46 +01:00
sk - > sk_bound_dev_if , addr - > l2tp_conn_id ) ) {
write_unlock_bh ( & l2tp_ip_lock ) ;
ret = - EADDRINUSE ;
goto out ;
}
sk_dst_reset ( sk ) ;
2010-04-02 06:19:00 +00:00
l2tp_ip_sk ( sk ) - > conn_id = addr - > l2tp_conn_id ;
sk_add_bind_node ( sk , & l2tp_ip_bind_table ) ;
sk_del_node_init ( sk ) ;
write_unlock_bh ( & l2tp_ip_lock ) ;
l2tp: fix racy socket lookup in l2tp_ip and l2tp_ip6 bind()
It's not enough to check for sockets bound to same address at the
beginning of l2tp_ip{,6}_bind(): even if no socket is found at that
time, a socket with the same address could be bound before we take
the l2tp lock again.
This patch moves the lookup right before inserting the new socket, so
that no change can ever happen to the list between address lookup and
socket insertion.
Care is taken to avoid side effects on the socket in case of failure.
That is, modifications of the socket are done after the lookup, when
binding is guaranteed to succeed, and before releasing the l2tp lock,
so that concurrent lookups will always see fully initialised sockets.
For l2tp_ip, 'ret' is set to -EINVAL before checking the SOCK_ZAPPED
bit. Error code was mistakenly set to -EADDRINUSE on error by commit
32c231164b76 ("l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind()").
Using -EINVAL restores original behaviour.
For l2tp_ip6, the lookup is now always done with the correct bound
device. Before this patch, when binding to a link-local address, the
lookup was done with the original sk->sk_bound_dev_if, which was later
overwritten with addr->l2tp_scope_id. Lookup is now performed with the
final sk->sk_bound_dev_if value.
Finally, the (addr_len >= sizeof(struct sockaddr_in6)) check has been
dropped: addr is a sockaddr_l2tpip6 not sockaddr_in6 and addr_len has
already been checked at this point (this part of the code seems to have
been copy-pasted from net/ipv6/raw.c).
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-29 13:09:46 +01:00
2010-04-02 06:19:00 +00:00
ret = 0 ;
2012-05-29 03:30:42 +00:00
sock_reset_flag ( sk , SOCK_ZAPPED ) ;
2010-04-02 06:19:00 +00:00
out :
release_sock ( sk ) ;
return ret ;
}
static int l2tp_ip_connect ( struct sock * sk , struct sockaddr * uaddr , int addr_len )
{
2020-07-22 17:32:05 +01:00
struct sockaddr_l2tpip * lsa = ( struct sockaddr_l2tpip * ) uaddr ;
2012-04-29 21:48:47 +00:00
int rc ;
2010-04-02 06:19:00 +00:00
if ( addr_len < sizeof ( * lsa ) )
2012-04-29 21:48:47 +00:00
return - EINVAL ;
2011-05-08 13:39:01 -07:00
2010-04-02 06:19:00 +00:00
if ( ipv4_is_multicast ( lsa - > l2tp_addr . s_addr ) )
2012-04-29 21:48:47 +00:00
return - EINVAL ;
2010-04-02 06:19:00 +00:00
2012-04-29 21:48:47 +00:00
lock_sock ( sk ) ;
2010-04-02 06:19:00 +00:00
2016-11-29 13:09:44 +01:00
/* Must bind first - autobinding does not work */
if ( sock_flag ( sk , SOCK_ZAPPED ) ) {
rc = - EINVAL ;
goto out_sk ;
}
rc = __ip4_datagram_connect ( sk , uaddr , addr_len ) ;
if ( rc < 0 )
goto out_sk ;
2010-04-02 06:19:00 +00:00
l2tp_ip_sk ( sk ) - > peer_conn_id = lsa - > l2tp_conn_id ;
write_lock_bh ( & l2tp_ip_lock ) ;
hlist_del_init ( & sk - > sk_bind_node ) ;
sk_add_bind_node ( sk , & l2tp_ip_bind_table ) ;
write_unlock_bh ( & l2tp_ip_lock ) ;
2016-11-29 13:09:44 +01:00
out_sk :
2011-05-08 13:39:01 -07:00
release_sock ( sk ) ;
2016-11-29 13:09:44 +01:00
2010-04-02 06:19:00 +00:00
return rc ;
}
2012-05-29 03:30:42 +00:00
static int l2tp_ip_disconnect ( struct sock * sk , int flags )
{
if ( sock_flag ( sk , SOCK_ZAPPED ) )
return 0 ;
2016-10-20 09:39:40 -07:00
return __udp_disconnect ( sk , flags ) ;
2012-05-29 03:30:42 +00:00
}
2010-04-02 06:19:00 +00:00
static int l2tp_ip_getname ( struct socket * sock , struct sockaddr * uaddr ,
2018-02-12 20:00:20 +01:00
int peer )
2010-04-02 06:19:00 +00:00
{
struct sock * sk = sock - > sk ;
struct inet_sock * inet = inet_sk ( sk ) ;
struct l2tp_ip_sock * lsk = l2tp_ip_sk ( sk ) ;
struct sockaddr_l2tpip * lsa = ( struct sockaddr_l2tpip * ) uaddr ;
memset ( lsa , 0 , sizeof ( * lsa ) ) ;
lsa - > l2tp_family = AF_INET ;
if ( peer ) {
if ( ! inet - > inet_dport )
return - ENOTCONN ;
lsa - > l2tp_conn_id = lsk - > peer_conn_id ;
lsa - > l2tp_addr . s_addr = inet - > inet_daddr ;
} else {
__be32 addr = inet - > inet_rcv_saddr ;
2020-07-22 17:32:05 +01:00
2010-04-02 06:19:00 +00:00
if ( ! addr )
addr = inet - > inet_saddr ;
lsa - > l2tp_conn_id = lsk - > conn_id ;
lsa - > l2tp_addr . s_addr = addr ;
}
2018-02-12 20:00:20 +01:00
return sizeof ( * lsa ) ;
2010-04-02 06:19:00 +00:00
}
static int l2tp_ip_backlog_recv ( struct sock * sk , struct sk_buff * skb )
{
int rc ;
/* Charge it to the socket, dropping if the queue is full. */
rc = sock_queue_rcv_skb ( sk , skb ) ;
if ( rc < 0 )
goto drop ;
return 0 ;
drop :
2013-02-05 14:36:02 -05:00
IP_INC_STATS ( sock_net ( sk ) , IPSTATS_MIB_INDISCARDS ) ;
2010-04-02 06:19:00 +00:00
kfree_skb ( skb ) ;
2017-02-26 17:58:19 +01:00
return 0 ;
2010-04-02 06:19:00 +00:00
}
/* Userspace will call sendmsg() on the tunnel socket to send L2TP
* control frames .
*/
2015-03-02 15:37:48 +08:00
static int l2tp_ip_sendmsg ( struct sock * sk , struct msghdr * msg , size_t len )
2010-04-02 06:19:00 +00:00
{
struct sk_buff * skb ;
int rc ;
struct inet_sock * inet = inet_sk ( sk ) ;
struct rtable * rt = NULL ;
2011-05-08 13:48:37 -07:00
struct flowi4 * fl4 ;
2010-04-02 06:19:00 +00:00
int connected = 0 ;
__be32 daddr ;
2011-05-08 13:39:01 -07:00
lock_sock ( sk ) ;
rc = - ENOTCONN ;
2010-04-02 06:19:00 +00:00
if ( sock_flag ( sk , SOCK_DEAD ) )
2011-05-08 13:39:01 -07:00
goto out ;
2010-04-02 06:19:00 +00:00
/* Get and verify the address. */
if ( msg - > msg_name ) {
2014-01-17 22:53:15 +01:00
DECLARE_SOCKADDR ( struct sockaddr_l2tpip * , lip , msg - > msg_name ) ;
2020-07-22 17:32:05 +01:00
2011-05-08 13:39:01 -07:00
rc = - EINVAL ;
2010-04-02 06:19:00 +00:00
if ( msg - > msg_namelen < sizeof ( * lip ) )
2011-05-08 13:39:01 -07:00
goto out ;
2010-04-02 06:19:00 +00:00
if ( lip - > l2tp_family ! = AF_INET ) {
2011-05-08 13:39:01 -07:00
rc = - EAFNOSUPPORT ;
2010-04-02 06:19:00 +00:00
if ( lip - > l2tp_family ! = AF_UNSPEC )
2011-05-08 13:39:01 -07:00
goto out ;
2010-04-02 06:19:00 +00:00
}
daddr = lip - > l2tp_addr . s_addr ;
} else {
2012-05-02 03:58:43 +00:00
rc = - EDESTADDRREQ ;
2010-04-02 06:19:00 +00:00
if ( sk - > sk_state ! = TCP_ESTABLISHED )
2012-05-02 03:58:43 +00:00
goto out ;
2010-04-02 06:19:00 +00:00
daddr = inet - > inet_daddr ;
connected = 1 ;
}
/* Allocate a socket buffer */
rc = - ENOMEM ;
skb = sock_wmalloc ( sk , 2 + NET_SKB_PAD + sizeof ( struct iphdr ) +
4 + len , 0 , GFP_KERNEL ) ;
if ( ! skb )
goto error ;
/* Reserve space for headers, putting IP header on 4-byte boundary. */
skb_reserve ( skb , 2 + NET_SKB_PAD ) ;
skb_reset_network_header ( skb ) ;
skb_reserve ( skb , sizeof ( struct iphdr ) ) ;
skb_reset_transport_header ( skb ) ;
/* Insert 0 session_id */
2020-07-22 17:32:05 +01:00
* ( ( __be32 * ) skb_put ( skb , 4 ) ) = 0 ;
2010-04-02 06:19:00 +00:00
/* Copy user data into skb */
2014-04-06 21:25:44 -04:00
rc = memcpy_from_msg ( skb_put ( skb , len ) , msg , len ) ;
2010-04-02 06:19:00 +00:00
if ( rc < 0 ) {
kfree_skb ( skb ) ;
goto error ;
}
2011-05-08 13:48:37 -07:00
fl4 = & inet - > cork . fl . u . ip4 ;
2010-04-02 06:19:00 +00:00
if ( connected )
2020-07-22 17:32:05 +01:00
rt = ( struct rtable * ) __sk_dst_check ( sk , 0 ) ;
2010-04-02 06:19:00 +00:00
2011-06-11 22:27:09 +00:00
rcu_read_lock ( ) ;
2020-07-23 12:29:50 +01:00
if ( ! rt ) {
2011-06-11 22:27:09 +00:00
const struct ip_options_rcu * inet_opt ;
2011-04-21 09:45:37 +00:00
2011-04-28 13:54:06 -07:00
inet_opt = rcu_dereference ( inet - > inet_opt ) ;
2011-04-21 09:45:37 +00:00
2010-04-02 06:19:00 +00:00
/* Use correct destination address if we have options. */
2011-04-21 09:45:37 +00:00
if ( inet_opt & & inet_opt - > opt . srr )
daddr = inet_opt - > opt . faddr ;
2010-04-02 06:19:00 +00:00
2011-03-12 00:00:52 -05:00
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
* itself out .
*/
2011-05-08 13:48:37 -07:00
rt = ip_route_output_ports ( sock_net ( sk ) , fl4 , sk ,
2011-03-12 00:00:52 -05:00
daddr , inet - > inet_saddr ,
inet - > inet_dport , inet - > inet_sport ,
ipv4: Set the routing scope properly in ip_route_output_ports().
Set scope automatically in ip_route_output_ports() (using the socket
SOCK_LOCALROUTE flag). This way, callers don't have to overload the
tos with the RTO_ONLINK flag, like RT_CONN_FLAGS() does.
For callers that don't pass a struct sock, this doesn't change anything
as the scope is still set to RT_SCOPE_UNIVERSE when sk is NULL.
Callers that passed a struct sock and used RT_CONN_FLAGS(sk) or
RT_CONN_FLAGS_TOS(sk, tos) for the tos are modified to use
ip_sock_tos(sk) and RT_TOS(tos) respectively, as overloading tos with
the RTO_ONLINK flag now becomes unnecessary.
In drivers/net/amt.c, all ip_route_output_ports() calls use a 0 tos
parameter, ignoring the SOCK_LOCALROUTE flag of the socket. But the sk
parameter is a kernel socket, which doesn't have any configuration path
for setting SOCK_LOCALROUTE anyway. Therefore, ip_route_output_ports()
will continue to initialise scope with RT_SCOPE_UNIVERSE and amt.c
doesn't need to be modified.
Also, remove RT_CONN_FLAGS() and RT_CONN_FLAGS_TOS() from route.h as
these macros are now unused.
The objective is to eventually remove RTO_ONLINK entirely to allow
converting ->flowi4_tos to dscp_t. This will ensure proper isolation
between the DSCP and ECN bits, thus minimising the risk of introducing
bugs where TOS values interfere with ECN.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/dacfd2ab40685e20959ab7b53c427595ba229e7d.1707496938.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-02-09 17:43:37 +01:00
sk - > sk_protocol , ip_sock_rt_tos ( sk ) ,
2011-03-12 00:00:52 -05:00
sk - > sk_bound_dev_if ) ;
if ( IS_ERR ( rt ) )
goto no_route ;
2012-06-08 06:25:00 +00:00
if ( connected ) {
2011-06-11 22:27:09 +00:00
sk_setup_caps ( sk , & rt - > dst ) ;
2012-06-08 06:25:00 +00:00
} else {
skb_dst_set ( skb , & rt - > dst ) ;
goto xmit ;
}
2010-04-02 06:19:00 +00:00
}
2011-06-11 22:27:09 +00:00
2021-06-07 23:01:37 +08:00
/* We don't need to clone dst here, it is guaranteed to not disappear.
2011-06-11 22:27:09 +00:00
* __dev_xmit_skb ( ) might force a refcount if needed .
*/
skb_dst_set_noref ( skb , & rt - > dst ) ;
2010-04-02 06:19:00 +00:00
2012-06-08 06:25:00 +00:00
xmit :
2010-04-02 06:19:00 +00:00
/* Queue the packet to IP for output */
2014-04-15 12:58:34 -04:00
rc = ip_queue_xmit ( sk , skb , & inet - > cork . fl ) ;
2011-06-11 22:27:09 +00:00
rcu_read_unlock ( ) ;
2010-04-02 06:19:00 +00:00
error :
2012-04-29 21:48:48 +00:00
if ( rc > = 0 )
2010-04-02 06:19:00 +00:00
rc = len ;
2011-05-08 13:39:01 -07:00
out :
release_sock ( sk ) ;
2010-04-02 06:19:00 +00:00
return rc ;
no_route :
2011-06-11 22:27:09 +00:00
rcu_read_unlock ( ) ;
2010-04-02 06:19:00 +00:00
IP_INC_STATS ( sock_net ( sk ) , IPSTATS_MIB_OUTNOROUTES ) ;
kfree_skb ( skb ) ;
2011-05-08 13:39:01 -07:00
rc = - EHOSTUNREACH ;
goto out ;
2010-04-02 06:19:00 +00:00
}
2015-03-02 15:37:48 +08:00
static int l2tp_ip_recvmsg ( struct sock * sk , struct msghdr * msg ,
net: remove noblock parameter from recvmsg() entities
The internal recvmsg() functions have two parameters 'flags' and 'noblock'
that were merged inside skb_recv_datagram(). As a follow up patch to commit
f4b41f062c42 ("net: remove noblock parameter from skb_recv_datagram()")
this patch removes the separate 'noblock' parameter for recvmsg().
Analogue to the referenced patch for skb_recv_datagram() the 'flags' and
'noblock' parameters are unnecessarily split up with e.g.
err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
or in
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
instead of simply using only flags all the time and check for MSG_DONTWAIT
where needed (to preserve for the formerly separated no(n)block condition).
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20220411124955.154876-1-socketcan@hartkopp.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2022-04-11 14:49:55 +02:00
size_t len , int flags , int * addr_len )
2010-04-02 06:19:00 +00:00
{
struct inet_sock * inet = inet_sk ( sk ) ;
size_t copied = 0 ;
int err = - EOPNOTSUPP ;
2014-01-17 22:53:15 +01:00
DECLARE_SOCKADDR ( struct sockaddr_in * , sin , msg - > msg_name ) ;
2010-04-02 06:19:00 +00:00
struct sk_buff * skb ;
if ( flags & MSG_OOB )
goto out ;
2022-04-04 18:30:22 +02:00
skb = skb_recv_datagram ( sk , flags , & err ) ;
2010-04-02 06:19:00 +00:00
if ( ! skb )
goto out ;
copied = skb - > len ;
if ( len < copied ) {
msg - > msg_flags | = MSG_TRUNC ;
copied = len ;
}
2014-11-05 16:46:40 -05:00
err = skb_copy_datagram_msg ( skb , 0 , msg , copied ) ;
2010-04-02 06:19:00 +00:00
if ( err )
goto done ;
sock_recv_timestamp ( msg , sk , skb ) ;
/* Copy the address. */
if ( sin ) {
sin - > sin_family = AF_INET ;
sin - > sin_addr . s_addr = ip_hdr ( skb ) - > saddr ;
sin - > sin_port = 0 ;
memset ( & sin - > sin_zero , 0 , sizeof ( sin - > sin_zero ) ) ;
2013-11-18 04:20:45 +01:00
* addr_len = sizeof ( * sin ) ;
2010-04-02 06:19:00 +00:00
}
2023-08-16 08:15:33 +00:00
if ( inet_cmsg_flags ( inet ) )
2010-04-02 06:19:00 +00:00
ip_cmsg_recv ( msg , skb ) ;
if ( flags & MSG_TRUNC )
copied = skb - > len ;
done :
skb_free_datagram ( sk , skb ) ;
out :
2012-04-29 21:48:48 +00:00
return err ? err : copied ;
2010-04-02 06:19:00 +00:00
}
net: ioctl: Use kernel memory on protocol ioctl callbacks
Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback. This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.
Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).
This changes the "struct proto" ioctl format in the following way:
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
(Important to say that this patch does not touch the "struct proto_ops"
protocols)
So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:
1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
to userspace
3) Read an input from userspace, and return a buffer to userspace.
The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:
* Protocol RAW:
* cmd = SIOCGETVIFCNT:
* input and output = struct sioc_vif_req
* cmd = SIOCGETSGCNT
* input and output = struct sioc_sg_req
* Explanation: for the SIOCGETVIFCNT case, userspace passes the input
argument, which is struct sioc_vif_req. Then the callback populates
the struct, which is copied back to userspace.
* Protocol RAW6:
* cmd = SIOCGETMIFCNT_IN6
* input and output = struct sioc_mif_req6
* cmd = SIOCGETSGCNT_IN6
* input and output = struct sioc_sg_req6
* Protocol PHONET:
* cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
* input int (4 bytes)
* Nothing is copied back to userspace.
For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.
The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-06-09 08:27:42 -07:00
int l2tp_ioctl ( struct sock * sk , int cmd , int * karg )
2017-02-09 16:15:52 -08:00
{
struct sk_buff * skb ;
switch ( cmd ) {
case SIOCOUTQ :
net: ioctl: Use kernel memory on protocol ioctl callbacks
Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback. This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.
Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).
This changes the "struct proto" ioctl format in the following way:
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
(Important to say that this patch does not touch the "struct proto_ops"
protocols)
So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:
1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
to userspace
3) Read an input from userspace, and return a buffer to userspace.
The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:
* Protocol RAW:
* cmd = SIOCGETVIFCNT:
* input and output = struct sioc_vif_req
* cmd = SIOCGETSGCNT
* input and output = struct sioc_sg_req
* Explanation: for the SIOCGETVIFCNT case, userspace passes the input
argument, which is struct sioc_vif_req. Then the callback populates
the struct, which is copied back to userspace.
* Protocol RAW6:
* cmd = SIOCGETMIFCNT_IN6
* input and output = struct sioc_mif_req6
* cmd = SIOCGETSGCNT_IN6
* input and output = struct sioc_sg_req6
* Protocol PHONET:
* cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
* input int (4 bytes)
* Nothing is copied back to userspace.
For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.
The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-06-09 08:27:42 -07:00
* karg = sk_wmem_alloc_get ( sk ) ;
2017-02-09 16:15:52 -08:00
break ;
case SIOCINQ :
spin_lock_bh ( & sk - > sk_receive_queue . lock ) ;
skb = skb_peek ( & sk - > sk_receive_queue ) ;
net: ioctl: Use kernel memory on protocol ioctl callbacks
Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback. This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.
Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).
This changes the "struct proto" ioctl format in the following way:
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
(Important to say that this patch does not touch the "struct proto_ops"
protocols)
So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:
1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
to userspace
3) Read an input from userspace, and return a buffer to userspace.
The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:
* Protocol RAW:
* cmd = SIOCGETVIFCNT:
* input and output = struct sioc_vif_req
* cmd = SIOCGETSGCNT
* input and output = struct sioc_sg_req
* Explanation: for the SIOCGETVIFCNT case, userspace passes the input
argument, which is struct sioc_vif_req. Then the callback populates
the struct, which is copied back to userspace.
* Protocol RAW6:
* cmd = SIOCGETMIFCNT_IN6
* input and output = struct sioc_mif_req6
* cmd = SIOCGETSGCNT_IN6
* input and output = struct sioc_sg_req6
* Protocol PHONET:
* cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
* input int (4 bytes)
* Nothing is copied back to userspace.
For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.
The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-06-09 08:27:42 -07:00
* karg = skb ? skb - > len : 0 ;
2017-02-09 16:15:52 -08:00
spin_unlock_bh ( & sk - > sk_receive_queue . lock ) ;
break ;
default :
return - ENOIOCTLCMD ;
}
net: ioctl: Use kernel memory on protocol ioctl callbacks
Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback. This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.
Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).
This changes the "struct proto" ioctl format in the following way:
int (*ioctl)(struct sock *sk, int cmd,
- unsigned long arg);
+ int *karg);
(Important to say that this patch does not touch the "struct proto_ops"
protocols)
So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:
1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
to userspace
3) Read an input from userspace, and return a buffer to userspace.
The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:
* Protocol RAW:
* cmd = SIOCGETVIFCNT:
* input and output = struct sioc_vif_req
* cmd = SIOCGETSGCNT
* input and output = struct sioc_sg_req
* Explanation: for the SIOCGETVIFCNT case, userspace passes the input
argument, which is struct sioc_vif_req. Then the callback populates
the struct, which is copied back to userspace.
* Protocol RAW6:
* cmd = SIOCGETMIFCNT_IN6
* input and output = struct sioc_mif_req6
* cmd = SIOCGETSGCNT_IN6
* input and output = struct sioc_sg_req6
* Protocol PHONET:
* cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
* input int (4 bytes)
* Nothing is copied back to userspace.
For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.
The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-06-09 08:27:42 -07:00
return 0 ;
2017-02-09 16:15:52 -08:00
}
2020-07-28 18:20:32 +01:00
EXPORT_SYMBOL_GPL ( l2tp_ioctl ) ;
2017-02-09 16:15:52 -08:00
2010-10-21 07:50:46 +00:00
static struct proto l2tp_ip_prot = {
2010-04-02 06:19:00 +00:00
. name = " L2TP/IP " ,
. owner = THIS_MODULE ,
. init = l2tp_ip_open ,
. close = l2tp_ip_close ,
. bind = l2tp_ip_bind ,
. connect = l2tp_ip_connect ,
2012-05-29 03:30:42 +00:00
. disconnect = l2tp_ip_disconnect ,
2017-02-09 16:15:52 -08:00
. ioctl = l2tp_ioctl ,
2010-04-02 06:19:00 +00:00
. destroy = l2tp_ip_destroy_sock ,
. setsockopt = ip_setsockopt ,
. getsockopt = ip_getsockopt ,
. sendmsg = l2tp_ip_sendmsg ,
. recvmsg = l2tp_ip_recvmsg ,
. backlog_rcv = l2tp_ip_backlog_recv ,
2020-05-29 11:20:53 -07:00
. hash = l2tp_ip_hash ,
. unhash = l2tp_ip_unhash ,
2010-04-02 06:19:00 +00:00
. obj_size = sizeof ( struct l2tp_ip_sock ) ,
} ;
static const struct proto_ops l2tp_ip_ops = {
. family = PF_INET ,
. owner = THIS_MODULE ,
. release = inet_release ,
. bind = inet_bind ,
. connect = inet_dgram_connect ,
. socketpair = sock_no_socketpair ,
. accept = sock_no_accept ,
. getname = l2tp_ip_getname ,
2018-06-28 09:43:44 -07:00
. poll = datagram_poll ,
2010-04-02 06:19:00 +00:00
. ioctl = inet_ioctl ,
2019-04-17 22:51:48 +02:00
. gettstamp = sock_gettstamp ,
2010-04-02 06:19:00 +00:00
. listen = sock_no_listen ,
. shutdown = inet_shutdown ,
. setsockopt = sock_common_setsockopt ,
. getsockopt = sock_common_getsockopt ,
. sendmsg = inet_sendmsg ,
. recvmsg = sock_common_recvmsg ,
. mmap = sock_no_mmap ,
} ;
static struct inet_protosw l2tp_ip_protosw = {
. type = SOCK_DGRAM ,
. protocol = IPPROTO_L2TP ,
. prot = & l2tp_ip_prot ,
. ops = & l2tp_ip_ops ,
} ;
static struct net_protocol l2tp_ip_protocol __read_mostly = {
. handler = l2tp_ip_recv ,
} ;
static int __init l2tp_ip_init ( void )
{
int err ;
2012-05-16 09:55:56 +00:00
pr_info ( " L2TP IP encapsulation support (L2TPv3) \n " ) ;
2010-04-02 06:19:00 +00:00
err = proto_register ( & l2tp_ip_prot , 1 ) ;
if ( err ! = 0 )
goto out ;
err = inet_add_protocol ( & l2tp_ip_protocol , IPPROTO_L2TP ) ;
if ( err )
goto out1 ;
inet_register_protosw ( & l2tp_ip_protosw ) ;
return 0 ;
out1 :
proto_unregister ( & l2tp_ip_prot ) ;
out :
return err ;
}
static void __exit l2tp_ip_exit ( void )
{
inet_unregister_protosw ( & l2tp_ip_protosw ) ;
inet_del_protocol ( & l2tp_ip_protocol , IPPROTO_L2TP ) ;
proto_unregister ( & l2tp_ip_prot ) ;
}
module_init ( l2tp_ip_init ) ;
module_exit ( l2tp_ip_exit ) ;
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " James Chapman <jchapman@katalix.com> " ) ;
MODULE_DESCRIPTION ( " L2TP over IP " ) ;
MODULE_VERSION ( " 1.0 " ) ;
2010-12-06 02:39:12 +00:00
2023-03-30 11:54:42 +02:00
/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
* because __stringify doesn ' t like enums
2010-12-06 02:39:12 +00:00
*/
2023-03-30 11:54:42 +02:00
MODULE_ALIAS_NET_PF_PROTO_TYPE ( PF_INET , 115 , 2 ) ;
MODULE_ALIAS_NET_PF_PROTO ( PF_INET , 115 ) ;