Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree, more relevant updates in this batch are:

1) Add Maglev support to IPVS. Moreover, store lastest server weight in
   IPVS since this is needed by maglev, patches from from Inju Song.

2) Preparation works to add iptables flowtable support, patches
   from Felix Fietkau.

3) Hand over flows back to conntrack slow path in case of TCP RST/FIN
   packet is seen via new teardown state, also from Felix.

4) Add support for extended netlink error reporting for nf_tables.

5) Support for larger timeouts that 23 days in nf_tables, patch from
   Florian Westphal.

6) Always set an upper limit to dynamic sets, also from Florian.

7) Allow number generator to make map lookups, from Laura Garcia.

8) Use hash_32() instead of opencode hashing in IPVS, from Vicent Bernat.

9) Extend ip6tables SRH match to support previous, next and last SID,
   from Ahmed Abdelsalam.

10) Move Passive OS fingerprint nf_osf.c, from Fernando Fernandez.

11) Expose nf_conntrack_max through ctnetlink, from Florent Fourcot.

12) Several housekeeping patches for xt_NFLOG, x_tables and ebtables,
   from Taehee Yoo.

13) Unify meta bridge with core nft_meta, then make nft_meta built-in.
   Make rt and exthdr built-in too, again from Florian.

14) Missing initialization of tbl->entries in IPVS, from Cong Wang.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2018-05-06 21:51:37 -04:00
104 changed files with 2758 additions and 1892 deletions

View File

@ -300,7 +300,7 @@ ipt_do_table(struct sk_buff *skb,
counter = xt_get_this_cpu_counter(&e->counters);
ADD_COUNTER(*counter, skb->len, 1);
t = ipt_get_target(e);
t = ipt_get_target_c(e);
WARN_ON(!t->u.kernel.target);
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)

View File

@ -47,7 +47,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
static unsigned int
masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
const struct nf_nat_ipv4_multi_range_compat *mr;
mr = par->targinfo;

View File

@ -33,8 +33,7 @@ static const struct xt_table nf_nat_ipv4_table = {
static unsigned int iptable_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
return ipt_do_table(skb, state, state->net->ipv4.nat_table);
}

View File

@ -2,265 +2,12 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct tcphdr *tcph;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
return 0;
}
static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct udphdr *udph;
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return 0;
}
static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
unsigned int thoff, __be32 addr,
__be32 new_addr)
{
switch (iph->protocol) {
case IPPROTO_TCP:
if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}
return 0;
}
static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
enum flow_offload_tuple_dir dir)
{
struct iphdr *iph = ip_hdr(skb);
unsigned int thoff = iph->ihl * 4;
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
return 0;
}
static bool ip_has_options(unsigned int thoff)
{
return thoff != sizeof(struct iphdr);
}
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
if (!pskb_may_pull(skb, sizeof(*iph)))
return -1;
iph = ip_hdr(skb);
thoff = iph->ihl * 4;
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
if (iph->protocol != IPPROTO_TCP &&
iph->protocol != IPPROTO_UDP)
return -1;
thoff = iph->ihl * 4;
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
return 0;
}
/* Based on ip_exceeds_mtu(). */
static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
return false;
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
}
static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
{
u32 mtu;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (__nf_flow_exceeds_mtu(skb, mtu))
return true;
return false;
}
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
const struct rtable *rt;
struct iphdr *iph;
__be32 nexthop;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*iph)))
return NF_DROP;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ip(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,

View File

@ -395,7 +395,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
@ -497,7 +497,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_callforwarding_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
/* This must be a fresh one. */
BUG_ON(new->status & IPS_NAT_DONE_MASK);

View File

@ -63,7 +63,7 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
#endif /* CONFIG_XFRM */
static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range *range)
const struct nf_nat_range2 *range)
{
return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
@ -143,7 +143,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
struct nf_nat_range2 *range)
{
if (tb[CTA_NAT_V4_MINIP]) {
range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
@ -246,8 +246,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@ -285,7 +284,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
ret = do_chain(priv, skb, state, ct);
ret = do_chain(priv, skb, state);
if (ret != NF_ACCEPT)
return ret;
@ -326,8 +325,7 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
@ -346,8 +344,7 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
@ -383,8 +380,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;

View File

@ -24,13 +24,13 @@
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
const struct net_device *out)
{
struct nf_conn *ct;
struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
const struct rtable *rt;
__be32 newsrc, nh;

View File

@ -48,7 +48,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t = {};
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
struct nf_nat_range2 range;
struct nf_conn_nat *nat;
nat = nf_ct_nat_ext_add(ct);

View File

@ -41,7 +41,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
static void
gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
static void
icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -28,8 +28,7 @@
static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;

View File

@ -21,7 +21,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_masq *priv = nft_expr_priv(expr);
struct nf_nat_range range;
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
range.flags = priv->flags;