From 38f9a08a3e6a6ad6393c60f82a50bdd0c23478b0 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 10 Oct 2023 22:51:59 +0100 Subject: [PATCH 1/2] sfc: parse mangle actions (NAT) in conntrack entries The MAE can edit either address, L4 port, or both, for either source or destination. These can't be mixed; i.e. it can edit source addr and source port, but not (say) source addr and dest port. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tc_conntrack.c | 91 ++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c index 44bb57670340..d90206f27161 100644 --- a/drivers/net/ethernet/sfc/tc_conntrack.c +++ b/drivers/net/ethernet/sfc/tc_conntrack.c @@ -276,10 +276,84 @@ static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr, return 0; } +/** + * struct efx_tc_ct_mangler_state - tracks which fields have been pedited + * + * @ipv4: IP source or destination addr has been set + * @tcpudp: TCP/UDP source or destination port has been set + */ +struct efx_tc_ct_mangler_state { + u8 ipv4:1; + u8 tcpudp:1; +}; + +static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn, + const struct flow_action_entry *fa, + struct efx_tc_ct_mangler_state *mung) +{ + /* Is this the first mangle we've processed for this rule? */ + bool first = !(mung->ipv4 || mung->tcpudp); + bool dnat = false; + + switch (fa->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + switch (fa->mangle.offset) { + case offsetof(struct iphdr, daddr): + dnat = true; + fallthrough; + case offsetof(struct iphdr, saddr): + if (fa->mangle.mask) + return -EOPNOTSUPP; + conn->nat_ip = htonl(fa->mangle.val); + mung->ipv4 = 1; + break; + default: + return -EOPNOTSUPP; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ + switch (fa->mangle.offset) { + case offsetof(struct tcphdr, dest): + BUILD_BUG_ON(offsetof(struct tcphdr, dest) != + offsetof(struct udphdr, dest)); + dnat = true; + fallthrough; + case offsetof(struct tcphdr, source): + BUILD_BUG_ON(offsetof(struct tcphdr, source) != + offsetof(struct udphdr, source)); + if (~fa->mangle.mask != 0xffff) + return -EOPNOTSUPP; + conn->l4_natport = htons(fa->mangle.val); + mung->tcpudp = 1; + break; + default: + return -EOPNOTSUPP; + } + break; + default: + return -EOPNOTSUPP; + } + /* first mangle tells us whether this is SNAT or DNAT; + * subsequent mangles must match that + */ + if (first) + conn->dnat = dnat; + else if (conn->dnat != dnat) + return -EOPNOTSUPP; + return 0; +} + static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, struct flow_cls_offload *tc) { struct flow_rule *fr = flow_cls_offload_flow_rule(tc); + struct efx_tc_ct_mangler_state mung = {}; struct efx_tc_ct_entry *conn, *old; struct efx_nic *efx = ct_zone->efx; const struct flow_action_entry *fa; @@ -326,6 +400,17 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, goto release; } break; + case FLOW_ACTION_MANGLE: + if (conn->eth_proto != htons(ETH_P_IP)) { + netif_dbg(efx, drv, efx->net_dev, + "NAT only supported for IPv4\n"); + rc = -EOPNOTSUPP; + goto release; + } + rc = efx_tc_ct_mangle(efx, conn, fa, &mung); + if (rc) + goto release; + break; default: netif_dbg(efx, drv, efx->net_dev, "Unhandled action %u for conntrack\n", fa->id); @@ -335,8 +420,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone, } /* fill in defaults for unmangled values */ - conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; - conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; + if (!mung.ipv4) + conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip; + if (!mung.tcpudp) + conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport; cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT); if (IS_ERR(cnt)) { From 0c7fe3b3720ed59219ba3d8079eddc719cb36b35 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 10 Oct 2023 22:52:00 +0100 Subject: [PATCH 2/2] sfc: support offloading ct(nat) action in RHS rules If an IP address and/or L4 port for NAPT is available from a CT match, the MAE will perform the edits; if no CT lookup has been performed for this packet, the CT lookup did not return a match, or the matched CT entry did not include NAPT, the action will have no effect. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mae.c | 3 ++- drivers/net/ethernet/sfc/tc.c | 8 ++++++++ drivers/net/ethernet/sfc/tc.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 021980a958b7..10709d828a63 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1291,10 +1291,11 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) size_t outlen; int rc; - MCDI_POPULATE_DWORD_4(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, + MCDI_POPULATE_DWORD_5(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS, MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push, MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop, MAE_ACTION_SET_ALLOC_IN_DECAP, act->decap, + MAE_ACTION_SET_ALLOC_IN_DO_NAT, act->do_nat, MAE_ACTION_SET_ALLOC_IN_DO_DECR_IP_TTL, act->do_ttl_dec); diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 3d76b7598631..6db3d7ed3a86 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -2457,6 +2457,14 @@ static int efx_tc_flower_replace(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); rc = -EOPNOTSUPP; goto release; + case FLOW_ACTION_CT: + if (fa->ct.action != TCA_CT_ACT_NAT) { + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_FMT_MOD(extack, "Can only offload CT 'nat' action in RHS rules, not %d", fa->ct.action); + goto release; + } + act->do_nat = 1; + break; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 86e38ea7988c..7b5190078bee 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -48,6 +48,7 @@ struct efx_tc_encap_action; /* see tc_encap_actions.h */ * @vlan_push: the number of vlan headers to push * @vlan_pop: the number of vlan headers to pop * @decap: used to indicate a tunnel header decapsulation should take place + * @do_nat: perform NAT/NPT with values returned by conntrack match * @do_ttl_dec: used to indicate IP TTL / Hop Limit should be decremented * @deliver: used to indicate a deliver action should take place * @vlan_tci: tci fields for vlan push actions @@ -68,6 +69,7 @@ struct efx_tc_action_set { u16 vlan_push:2; u16 vlan_pop:2; u16 decap:1; + u16 do_nat:1; u16 do_ttl_dec:1; u16 deliver:1; __be16 vlan_tci[2];