linux/net/sched/act_tunnel_key.c
Vlad Buslov 87750d173c net: sched: act_tunnel_key: fix metadata handling
Tunnel key action params->tcft_enc_metadata is only set when action is
TCA_TUNNEL_KEY_ACT_SET. However, metadata pointer is incorrectly
dereferenced during tunnel key init and release without verifying that
action is if correct type, which causes NULL pointer dereference. Metadata
tunnel dst_cache is also leaked on action overwrite.

Fix metadata handling:
- Verify that metadata pointer is not NULL before dereferencing it in
  tunnel_key_init error handling code.
- Move dst_cache destroy code into tunnel_key_release_params() function
  that is called in both action overwrite and release cases (fixes resource
  leak) and verifies that actions has correct type before dereferencing
  metadata pointer (fixes NULL pointer dereference).

Oops with KASAN enabled during tdc tests execution:

[  261.080482] ==================================================================
[  261.088049] BUG: KASAN: null-ptr-deref in dst_cache_destroy+0x21/0xa0
[  261.094613] Read of size 8 at addr 00000000000000b0 by task tc/2976
[  261.102524] CPU: 14 PID: 2976 Comm: tc Not tainted 5.0.0-rc7+ #157
[  261.108844] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  261.116726] Call Trace:
[  261.119234]  dump_stack+0x9a/0xeb
[  261.122625]  ? dst_cache_destroy+0x21/0xa0
[  261.126818]  ? dst_cache_destroy+0x21/0xa0
[  261.131004]  kasan_report+0x176/0x192
[  261.134752]  ? idr_get_next+0xd0/0x120
[  261.138578]  ? dst_cache_destroy+0x21/0xa0
[  261.142768]  dst_cache_destroy+0x21/0xa0
[  261.146799]  tunnel_key_release+0x3a/0x50 [act_tunnel_key]
[  261.152392]  tcf_action_cleanup+0x2c/0xc0
[  261.156490]  tcf_generic_walker+0x4c2/0x5c0
[  261.160794]  ? tcf_action_dump_1+0x390/0x390
[  261.165163]  ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key]
[  261.170865]  ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key]
[  261.176641]  tca_action_gd+0x600/0xa40
[  261.180482]  ? tca_get_fill.constprop.17+0x200/0x200
[  261.185548]  ? __lock_acquire+0x588/0x1d20
[  261.189741]  ? __lock_acquire+0x588/0x1d20
[  261.193922]  ? mark_held_locks+0x90/0x90
[  261.197944]  ? mark_held_locks+0x90/0x90
[  261.202018]  ? __nla_parse+0xfe/0x190
[  261.205774]  tc_ctl_action+0x218/0x230
[  261.209614]  ? tcf_action_add+0x230/0x230
[  261.213726]  rtnetlink_rcv_msg+0x3a5/0x600
[  261.217910]  ? lock_downgrade+0x2d0/0x2d0
[  261.222006]  ? validate_linkmsg+0x400/0x400
[  261.226278]  ? find_held_lock+0x6d/0xd0
[  261.230200]  ? match_held_lock+0x1b/0x210
[  261.234296]  ? validate_linkmsg+0x400/0x400
[  261.238567]  netlink_rcv_skb+0xc7/0x1f0
[  261.242489]  ? netlink_ack+0x470/0x470
[  261.246319]  ? netlink_deliver_tap+0x1f3/0x5a0
[  261.250874]  netlink_unicast+0x2ae/0x350
[  261.254884]  ? netlink_attachskb+0x340/0x340
[  261.261647]  ? _copy_from_iter_full+0xdd/0x380
[  261.268576]  ? __virt_addr_valid+0xb6/0xf0
[  261.275227]  ? __check_object_size+0x159/0x240
[  261.282184]  netlink_sendmsg+0x4d3/0x630
[  261.288572]  ? netlink_unicast+0x350/0x350
[  261.295132]  ? netlink_unicast+0x350/0x350
[  261.301608]  sock_sendmsg+0x6d/0x80
[  261.307467]  ___sys_sendmsg+0x48e/0x540
[  261.313633]  ? copy_msghdr_from_user+0x210/0x210
[  261.320545]  ? save_stack+0x89/0xb0
[  261.326289]  ? __lock_acquire+0x588/0x1d20
[  261.332605]  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  261.340063]  ? mark_held_locks+0x90/0x90
[  261.346162]  ? do_filp_open+0x138/0x1d0
[  261.352108]  ? may_open_dev+0x50/0x50
[  261.357897]  ? match_held_lock+0x1b/0x210
[  261.364016]  ? __fget_light+0xa6/0xe0
[  261.369840]  ? __sys_sendmsg+0xd2/0x150
[  261.375814]  __sys_sendmsg+0xd2/0x150
[  261.381610]  ? __ia32_sys_shutdown+0x30/0x30
[  261.388026]  ? lock_downgrade+0x2d0/0x2d0
[  261.394182]  ? mark_held_locks+0x1c/0x90
[  261.400230]  ? do_syscall_64+0x1e/0x280
[  261.406172]  do_syscall_64+0x78/0x280
[  261.411932]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  261.419103] RIP: 0033:0x7f28e91a8b87
[  261.424791] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48
[  261.448226] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  261.458183] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87
[  261.467728] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003
[  261.477342] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c
[  261.486970] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001
[  261.496599] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480
[  261.506281] ==================================================================
[  261.516076] Disabling lock debugging due to kernel taint
[  261.523979] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0
[  261.534413] #PF error: [normal kernel read fault]
[  261.541730] PGD 8000000317400067 P4D 8000000317400067 PUD 316878067 PMD 0
[  261.551294] Oops: 0000 [#1] SMP KASAN PTI
[  261.557985] CPU: 14 PID: 2976 Comm: tc Tainted: G    B             5.0.0-rc7+ #157
[  261.568306] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  261.578874] RIP: 0010:dst_cache_destroy+0x21/0xa0
[  261.586413] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40
[  261.611247] RSP: 0018:ffff888316447160 EFLAGS: 00010282
[  261.619564] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071
[  261.629862] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297
[  261.640149] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89
[  261.650467] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0
[  261.660785] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001
[  261.671110] FS:  00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000
[  261.682447] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  261.691491] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0
[  261.701283] Call Trace:
[  261.706374]  tunnel_key_release+0x3a/0x50 [act_tunnel_key]
[  261.714522]  tcf_action_cleanup+0x2c/0xc0
[  261.721208]  tcf_generic_walker+0x4c2/0x5c0
[  261.728074]  ? tcf_action_dump_1+0x390/0x390
[  261.734996]  ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key]
[  261.743247]  ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key]
[  261.751557]  tca_action_gd+0x600/0xa40
[  261.757991]  ? tca_get_fill.constprop.17+0x200/0x200
[  261.765644]  ? __lock_acquire+0x588/0x1d20
[  261.772461]  ? __lock_acquire+0x588/0x1d20
[  261.779266]  ? mark_held_locks+0x90/0x90
[  261.785880]  ? mark_held_locks+0x90/0x90
[  261.792470]  ? __nla_parse+0xfe/0x190
[  261.798738]  tc_ctl_action+0x218/0x230
[  261.805145]  ? tcf_action_add+0x230/0x230
[  261.811760]  rtnetlink_rcv_msg+0x3a5/0x600
[  261.818564]  ? lock_downgrade+0x2d0/0x2d0
[  261.825433]  ? validate_linkmsg+0x400/0x400
[  261.832256]  ? find_held_lock+0x6d/0xd0
[  261.838624]  ? match_held_lock+0x1b/0x210
[  261.845142]  ? validate_linkmsg+0x400/0x400
[  261.851729]  netlink_rcv_skb+0xc7/0x1f0
[  261.857976]  ? netlink_ack+0x470/0x470
[  261.864132]  ? netlink_deliver_tap+0x1f3/0x5a0
[  261.870969]  netlink_unicast+0x2ae/0x350
[  261.877294]  ? netlink_attachskb+0x340/0x340
[  261.883962]  ? _copy_from_iter_full+0xdd/0x380
[  261.890750]  ? __virt_addr_valid+0xb6/0xf0
[  261.897188]  ? __check_object_size+0x159/0x240
[  261.903928]  netlink_sendmsg+0x4d3/0x630
[  261.910112]  ? netlink_unicast+0x350/0x350
[  261.916410]  ? netlink_unicast+0x350/0x350
[  261.922656]  sock_sendmsg+0x6d/0x80
[  261.928257]  ___sys_sendmsg+0x48e/0x540
[  261.934183]  ? copy_msghdr_from_user+0x210/0x210
[  261.940865]  ? save_stack+0x89/0xb0
[  261.946355]  ? __lock_acquire+0x588/0x1d20
[  261.952358]  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  261.959468]  ? mark_held_locks+0x90/0x90
[  261.965248]  ? do_filp_open+0x138/0x1d0
[  261.970910]  ? may_open_dev+0x50/0x50
[  261.976386]  ? match_held_lock+0x1b/0x210
[  261.982210]  ? __fget_light+0xa6/0xe0
[  261.987648]  ? __sys_sendmsg+0xd2/0x150
[  261.993263]  __sys_sendmsg+0xd2/0x150
[  261.998613]  ? __ia32_sys_shutdown+0x30/0x30
[  262.004555]  ? lock_downgrade+0x2d0/0x2d0
[  262.010236]  ? mark_held_locks+0x1c/0x90
[  262.015758]  ? do_syscall_64+0x1e/0x280
[  262.021234]  do_syscall_64+0x78/0x280
[  262.026500]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  262.033207] RIP: 0033:0x7f28e91a8b87
[  262.038421] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48
[  262.060708] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  262.070112] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87
[  262.079087] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003
[  262.088122] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c
[  262.097157] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001
[  262.106207] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480
[  262.115271] Modules linked in: act_tunnel_key act_skbmod act_simple act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 act_csum libcrc32c act_meta_skbtcindex act_meta_skbprio act_meta_mark act_ife ife act_police act_sample psample act_gact veth nfsv3 nfs_acl nfs lockd grace fscache bridge stp llc intel_rapl sb_edac mlx5_ib x86_pkg_temp_thermal sunrpc intel_powerclamp coretemp ib_uverbs kvm_intel ib_core kvm irqbypass mlx5_core crct10dif_pclmul crc32_pclmul crc32c_intel igb ghash_clmulni_intel intel_cstate mlxfw iTCO_wdt devlink intel_uncore iTCO_vendor_support ipmi_ssif ptp mei_me intel_rapl_perf ioatdma joydev pps_core ses mei i2c_i801 pcspkr enclosure lpc_ich dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter pcc_cpufreq ast i2c_algo_bit drm_kms_helper ttm drm mpt3sas raid_class scsi_transport_sas
[  262.204393] CR2: 00000000000000b0
[  262.210390] ---[ end trace 2e41d786f2c7901a ]---
[  262.226790] RIP: 0010:dst_cache_destroy+0x21/0xa0
[  262.234083] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40
[  262.258311] RSP: 0018:ffff888316447160 EFLAGS: 00010282
[  262.266304] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071
[  262.276251] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297
[  262.286208] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89
[  262.296183] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0
[  262.306157] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001
[  262.316139] FS:  00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000
[  262.327146] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  262.335815] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0

Fixes: 41411e2fd6 ("net/sched: act_tunnel_key: Add dst_cache support")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-27 21:31:15 -08:00

629 lines
16 KiB
C

/*
* Copyright (c) 2016, Amir Vadai <amir@vadai.me>
* Copyright (c) 2016, Mellanox Technologies. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <net/geneve.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
int action;
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
skb_dst_drop(skb);
break;
case TCA_TUNNEL_KEY_ACT_SET:
skb_dst_drop(skb);
skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
break;
default:
WARN_ONCE(1, "Bad tunnel_key action %d.\n",
params->tcft_action);
break;
}
return action;
}
static const struct nla_policy
enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
};
static const struct nla_policy
geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
.len = 128 },
};
static int
tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
int err, data_len, opt_len;
u8 *data;
err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
nla, geneve_opt_policy, extack);
if (err < 0)
return err;
if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
return -EINVAL;
}
data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
if (data_len < 4) {
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
return -ERANGE;
}
if (data_len % 4) {
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
return -ERANGE;
}
opt_len = sizeof(struct geneve_opt) + data_len;
if (dst) {
struct geneve_opt *opt = dst;
WARN_ON(dst_len < opt_len);
opt->opt_class =
nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
opt->length = data_len / 4; /* length is in units of 4 bytes */
opt->r1 = 0;
opt->r2 = 0;
opt->r3 = 0;
memcpy(opt + 1, data, data_len);
}
return opt_len;
}
static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
int dst_len, struct netlink_ext_ack *extack)
{
int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
const struct nlattr *attr, *head = nla_data(nla);
err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
enc_opts_policy, extack);
if (err)
return err;
nla_for_each_attr(attr, head, len, rem) {
switch (nla_type(attr)) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
opt_len = tunnel_key_copy_geneve_opt(attr, dst,
dst_len, extack);
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
if (dst) {
dst_len -= opt_len;
dst += opt_len;
}
break;
}
}
if (!opts_len) {
NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
return -EINVAL;
}
if (rem > 0) {
NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
return -EINVAL;
}
return opts_len;
}
static int tunnel_key_get_opts_len(struct nlattr *nla,
struct netlink_ext_ack *extack)
{
return tunnel_key_copy_opts(nla, NULL, 0, extack);
}
static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
int opts_len, struct netlink_ext_ack *extack)
{
info->options_len = opts_len;
switch (nla_type(nla_data(nla))) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
info->key.tun_flags |= TUNNEL_GENEVE_OPT;
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
return -EAFNOSUPPORT;
#endif
default:
NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
return -EINVAL;
}
}
static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) },
[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
[TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_TUNNEL_KEY_ENC_TOS] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 },
};
static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
{
if (!p)
return;
if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
#ifdef CONFIG_DST_CACHE
struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
dst_cache_destroy(&info->dst_cache);
#endif
dst_release(&p->tcft_enc_metadata->dst);
}
kfree_rcu(p, rcu);
}
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
__be16 dst_port = 0;
__be64 key_id = 0;
int opts_len = 0;
__be16 flags = 0;
u8 tos, ttl;
int ret = 0;
int err;
if (!nla) {
NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
return -EINVAL;
}
err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
extack);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
return err;
}
if (!tb[TCA_TUNNEL_KEY_PARMS]) {
NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
return -EINVAL;
}
parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
if (err < 0)
return err;
exists = err;
if (exists && bind)
return 0;
switch (parm->t_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
break;
case TCA_TUNNEL_KEY_ACT_SET:
if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
__be32 key32;
key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
key_id = key32_to_tunnel_id(key32);
flags = TUNNEL_KEY;
}
flags |= TUNNEL_CSUM;
if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
flags &= ~TUNNEL_CSUM;
if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
extack);
if (opts_len < 0) {
ret = opts_len;
goto err_out;
}
}
tos = 0;
if (tb[TCA_TUNNEL_KEY_ENC_TOS])
tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
ttl = 0;
if (tb[TCA_TUNNEL_KEY_ENC_TTL])
ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
__be32 saddr;
__be32 daddr;
saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
dst_port, flags,
key_id, opts_len);
} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
struct in6_addr saddr;
struct in6_addr daddr;
saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
key_id, 0);
} else {
NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
ret = -EINVAL;
goto err_out;
}
if (!metadata) {
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
ret = -ENOMEM;
goto err_out;
}
#ifdef CONFIG_DST_CACHE
ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL);
if (ret)
goto release_tun_meta;
#endif
if (opts_len) {
ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
&metadata->u.tun_info,
opts_len, extack);
if (ret < 0)
goto release_dst_cache;
}
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
ret = -EINVAL;
goto err_out;
}
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_tunnel_key_ops, bind, true);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
goto release_dst_cache;
}
ret = ACT_P_CREATED;
} else if (!ovr) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
goto release_dst_cache;
}
t = to_tunnel_key(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
ret = -ENOMEM;
exists = true;
goto release_dst_cache;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
spin_lock_bh(&t->tcf_lock);
t->tcf_action = parm->action;
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
spin_unlock_bh(&t->tcf_lock);
tunnel_key_release_params(params_new);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
release_dst_cache:
#ifdef CONFIG_DST_CACHE
if (metadata)
dst_cache_destroy(&metadata->u.tun_info.dst_cache);
#endif
release_tun_meta:
dst_release(&metadata->dst);
err_out:
if (exists)
tcf_idr_release(*a, bind);
else
tcf_idr_cleanup(tn, parm->index);
return ret;
}
static void tunnel_key_release(struct tc_action *a)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params, 1);
tunnel_key_release_params(params);
}
static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
int len = info->options_len;
u8 *src = (u8 *)(info + 1);
struct nlattr *start;
start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
if (!start)
return -EMSGSIZE;
while (len > 0) {
struct geneve_opt *opt = (struct geneve_opt *)src;
if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
opt->type) ||
nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
opt->length * 4, opt + 1)) {
nla_nest_cancel(skb, start);
return -EMSGSIZE;
}
len -= sizeof(struct geneve_opt) + opt->length * 4;
src += sizeof(struct geneve_opt) + opt->length * 4;
}
nla_nest_end(skb, start);
return 0;
}
static int tunnel_key_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
struct nlattr *start;
int err = -EINVAL;
if (!info->options_len)
return 0;
start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
if (!start)
return -EMSGSIZE;
if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
err = tunnel_key_geneve_opts_dump(skb, info);
if (err)
goto err_out;
} else {
err_out:
nla_nest_cancel(skb, start);
return err;
}
nla_nest_end(skb, start);
return 0;
}
static int tunnel_key_dump_addresses(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
unsigned short family = ip_tunnel_info_af(info);
if (family == AF_INET) {
__be32 saddr = info->key.u.ipv4.src;
__be32 daddr = info->key.u.ipv4.dst;
if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
return 0;
}
if (family == AF_INET6) {
const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
if (!nla_put_in6_addr(skb,
TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
!nla_put_in6_addr(skb,
TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
return 0;
}
return -EINVAL;
}
static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
struct tc_tunnel_key opt = {
.index = t->tcf_index,
.refcnt = refcount_read(&t->tcf_refcnt) - ref,
.bindcnt = atomic_read(&t->tcf_bindcnt) - bind,
};
struct tcf_t tm;
spin_lock_bh(&t->tcf_lock);
params = rcu_dereference_protected(t->params,
lockdep_is_held(&t->tcf_lock));
opt.action = t->tcf_action;
opt.t_action = params->tcft_action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
struct ip_tunnel_info *info =
&params->tcft_enc_metadata->u.tun_info;
struct ip_tunnel_key *key = &info->key;
__be32 key_id = tunnel_id_to_key32(key->tun_id);
if (((key->tun_flags & TUNNEL_KEY) &&
nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
tunnel_key_dump_addresses(skb,
&params->tcft_enc_metadata->u.tun_info) ||
(key->tp_dst &&
nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
key->tp_dst)) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
!(key->tun_flags & TUNNEL_CSUM)) ||
tunnel_key_opts_dump(skb, info))
goto nla_put_failure;
if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
goto nla_put_failure;
if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
goto nla_put_failure;
}
tcf_tm_dump(&tm, &t->tcf_tm);
if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
&tm, TCA_TUNNEL_KEY_PAD))
goto nla_put_failure;
spin_unlock_bh(&t->tcf_lock);
return skb->len;
nla_put_failure:
spin_unlock_bh(&t->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
.id = TCA_ID_TUNNEL_KEY,
.owner = THIS_MODULE,
.act = tunnel_key_act,
.dump = tunnel_key_dump,
.init = tunnel_key_init,
.cleanup = tunnel_key_release,
.walk = tunnel_key_walker,
.lookup = tunnel_key_search,
.size = sizeof(struct tcf_tunnel_key),
};
static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tc_action_net_init(tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
tc_action_net_exit(net_list, tunnel_key_net_id);
}
static struct pernet_operations tunnel_key_net_ops = {
.init = tunnel_key_init_net,
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
};
static int __init tunnel_key_init_module(void)
{
return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}
static void __exit tunnel_key_cleanup_module(void)
{
tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}
module_init(tunnel_key_init_module);
module_exit(tunnel_key_cleanup_module);
MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
MODULE_DESCRIPTION("ip tunnel manipulation actions");
MODULE_LICENSE("GPL v2");